From 4627cef8c97c988cf0a11d3b50e8c5514f8dd1f8 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Fri, 10 Apr 2026 22:35:30 +0300 Subject: [PATCH 001/139] ci: use pandoc/actions/setup instead of apt-get apt-get install pandoc took ~27 minutes due to apt index refresh. The prebuilt binary action completes in seconds. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/static.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml index 90e7f35..84a76db 100644 --- a/.github/workflows/static.yml +++ b/.github/workflows/static.yml @@ -32,7 +32,7 @@ jobs: - name: Checkout uses: actions/checkout@v6 - name: Install pandoc - run: sudo apt-get install -y pandoc + uses: pandoc/actions/setup@v1 - name: Generate blog HTML run: make blog - name: Setup Pages -- 2.34.1 From a96b84fdebd758874836d9e5f3de60cad84c083b Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Fri, 10 Apr 2026 23:16:59 +0300 Subject: [PATCH 002/139] ci: use pandoc/actions/setup instead of apt-get (#76) apt-get install pandoc took ~27 minutes due to apt index refresh. The prebuilt binary action completes in seconds. Co-authored-by: Claude Opus 4.6 (1M context) --- .github/workflows/static.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml index 90e7f35..84a76db 100644 --- a/.github/workflows/static.yml +++ b/.github/workflows/static.yml @@ -32,7 +32,7 @@ jobs: - name: Checkout uses: actions/checkout@v6 - name: Install pandoc - run: sudo apt-get install -y pandoc + uses: pandoc/actions/setup@v1 - name: Generate blog HTML run: make blog - name: Setup Pages -- 2.34.1 From 8abcd91f95d805eb36782b2837851579d8ec2c4f Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 11 Apr 2026 00:26:58 +0300 Subject: [PATCH 003/139] feat: multi-forwarder with SRTT-based failover (#77) * feat: multi-forwarder with SRTT-based failover address accepts string or array, with optional per-server port override. New fallback pool tried only when all primaries fail. Sequential failover with SRTT ranking ensures fastest upstream is tried first. Closes #34 (items 1, 2, 3) Co-Authored-By: Claude Opus 4.6 (1M context) * refactor: simplify failover candidate list and deduplicate recursive pool Co-Authored-By: Claude Opus 4.6 * refactor: extract maybe_update_primary for testable upstream re-detection Co-Authored-By: Claude Opus 4.6 * style: rustfmt Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 (1M context) --- src/api.rs | 18 ++-- src/config.rs | 66 ++++++++++++-- src/ctx.rs | 12 +-- src/dot.rs | 5 +- src/forward.rs | 241 ++++++++++++++++++++++++++++++++++++++++++++++++- src/main.rs | 96 ++++++++------------ 6 files changed, 357 insertions(+), 81 deletions(-) diff --git a/src/api.rs b/src/api.rs index 2e66931..a0bae58 100644 --- a/src/api.rs +++ b/src/api.rs @@ -411,9 +411,12 @@ async fn diagnose( } // Check upstream (async, no locks held) - let upstream = ctx.upstream.lock().unwrap().clone(); - let (upstream_matched, upstream_detail) = - forward_query_for_diagnose(&domain_lower, &upstream, ctx.timeout).await; + let upstream = ctx.upstream_pool.lock().unwrap().preferred().cloned(); + let (upstream_matched, upstream_detail) = if let Some(ref u) = upstream { + forward_query_for_diagnose(&domain_lower, u, ctx.timeout).await + } else { + (false, "no upstream configured".to_string()) + }; steps.push(DiagnoseStep { source: "upstream".to_string(), matched: upstream_matched, @@ -520,7 +523,7 @@ async fn stats(State(ctx): State>) -> Json { let upstream = if ctx.upstream_mode == crate::config::UpstreamMode::Recursive { "recursive (root hints)".to_string() } else { - ctx.upstream.lock().unwrap().to_string() + ctx.upstream_pool.lock().unwrap().label() }; Json(StatsResponse { @@ -1016,8 +1019,11 @@ mod tests { services: Mutex::new(crate::service_store::ServiceStore::new()), lan_peers: Mutex::new(crate::lan::PeerStore::new(90)), forwarding_rules: Vec::new(), - upstream: Mutex::new(crate::forward::Upstream::Udp( - "127.0.0.1:53".parse().unwrap(), + upstream_pool: Mutex::new(crate::forward::UpstreamPool::new( + vec![crate::forward::Upstream::Udp( + "127.0.0.1:53".parse().unwrap(), + )], + vec![], )), upstream_auto: false, upstream_port: 53, diff --git a/src/config.rs b/src/config.rs index 9373d33..fa794d7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -97,10 +97,12 @@ impl UpstreamMode { pub struct UpstreamConfig { #[serde(default)] pub mode: UpstreamMode, - #[serde(default = "default_upstream_addr")] - pub address: String, + #[serde(default, deserialize_with = "string_or_vec")] + pub address: Vec, #[serde(default = "default_upstream_port")] pub port: u16, + #[serde(default)] + pub fallback: Vec, #[serde(default = "default_timeout_ms")] pub timeout_ms: u64, #[serde(default = "default_root_hints")] @@ -115,8 +117,9 @@ impl Default for UpstreamConfig { fn default() -> Self { UpstreamConfig { mode: UpstreamMode::default(), - address: default_upstream_addr(), + address: Vec::new(), port: default_upstream_port(), + fallback: Vec::new(), timeout_ms: default_timeout_ms(), root_hints: default_root_hints(), prime_tlds: default_prime_tlds(), @@ -125,6 +128,33 @@ impl Default for UpstreamConfig { } } +fn string_or_vec<'de, D>(deserializer: D) -> std::result::Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + struct Visitor; + impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = Vec; + fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.write_str("string or array of strings") + } + fn visit_str(self, v: &str) -> std::result::Result { + Ok(vec![v.to_string()]) + } + fn visit_seq>( + self, + mut seq: A, + ) -> std::result::Result { + let mut v = Vec::new(); + while let Some(s) = seq.next_element::()? { + v.push(s); + } + Ok(v) + } + } + deserializer.deserialize_any(Visitor) +} + fn default_true() -> bool { true } @@ -202,9 +232,6 @@ fn default_root_hints() -> Vec { ] } -fn default_upstream_addr() -> String { - String::new() // empty = auto-detect from system resolver -} fn default_upstream_port() -> u16 { 53 } @@ -525,6 +552,33 @@ mod tests { assert!(config.services[0].routes[0].strip); assert!(!config.services[0].routes[1].strip); // default false } + + #[test] + fn address_string_parses_to_vec() { + let config: Config = toml::from_str("[upstream]\naddress = \"1.2.3.4\"").unwrap(); + assert_eq!(config.upstream.address, vec!["1.2.3.4"]); + } + + #[test] + fn address_array_parses() { + let config: Config = + toml::from_str("[upstream]\naddress = [\"1.2.3.4\", \"5.6.7.8:5353\"]").unwrap(); + assert_eq!(config.upstream.address, vec!["1.2.3.4", "5.6.7.8:5353"]); + } + + #[test] + fn fallback_parses() { + let config: Config = + toml::from_str("[upstream]\nfallback = [\"8.8.8.8\", \"1.1.1.1\"]").unwrap(); + assert_eq!(config.upstream.fallback, vec!["8.8.8.8", "1.1.1.1"]); + } + + #[test] + fn empty_address_gives_empty_vec() { + let config: Config = toml::from_str("").unwrap(); + assert!(config.upstream.address.is_empty()); + assert!(config.upstream.fallback.is_empty()); + } } pub struct ConfigLoad { diff --git a/src/ctx.rs b/src/ctx.rs index 6b774eb..b4e0777 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -16,7 +16,7 @@ use crate::blocklist::BlocklistStore; use crate::buffer::BytePacketBuffer; use crate::cache::{DnsCache, DnssecStatus}; use crate::config::{UpstreamMode, ZoneMap}; -use crate::forward::{forward_query, Upstream}; +use crate::forward::{forward_query, forward_with_failover, Upstream, UpstreamPool}; use crate::header::ResultCode; use crate::health::HealthMeta; use crate::lan::PeerStore; @@ -42,7 +42,7 @@ pub struct ServerCtx { pub services: Mutex, pub lan_peers: Mutex, pub forwarding_rules: Vec, - pub upstream: Mutex, + pub upstream_pool: Mutex, pub upstream_auto: bool, pub upstream_port: u16, pub lan_ip: Mutex, @@ -220,12 +220,8 @@ pub async fn resolve_query( } (resp, path, DnssecStatus::Indeterminate) } else { - let upstream = - match crate::system_dns::match_forwarding_rule(&qname, &ctx.forwarding_rules) { - Some(addr) => Upstream::Udp(addr), - None => ctx.upstream.lock().unwrap().clone(), - }; - match forward_query(&query, &upstream, ctx.timeout).await { + let pool = ctx.upstream_pool.lock().unwrap().clone(); + match forward_with_failover(&query, &pool, &ctx.srtt, ctx.timeout).await { Ok(resp) => { ctx.cache.write().unwrap().insert(&qname, qtype, &resp); (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate) diff --git a/src/dot.rs b/src/dot.rs index 3ed47ba..0d48fa2 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -362,7 +362,10 @@ mod tests { services: Mutex::new(crate::service_store::ServiceStore::new()), lan_peers: Mutex::new(crate::lan::PeerStore::new(90)), forwarding_rules: Vec::new(), - upstream: Mutex::new(crate::forward::Upstream::Udp(upstream_addr)), + upstream_pool: Mutex::new(crate::forward::UpstreamPool::new( + vec![crate::forward::Upstream::Udp(upstream_addr)], + vec![], + )), upstream_auto: false, upstream_port: 53, lan_ip: Mutex::new(std::net::Ipv4Addr::LOCALHOST), diff --git a/src/forward.rs b/src/forward.rs index ea2b03e..78efcb9 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -1,12 +1,14 @@ use std::fmt; -use std::net::SocketAddr; -use std::time::Duration; +use std::net::{IpAddr, SocketAddr}; +use std::sync::RwLock; +use std::time::{Duration, Instant}; use tokio::net::UdpSocket; use tokio::time::timeout; use crate::buffer::BytePacketBuffer; use crate::packet::DnsPacket; +use crate::srtt::SrttCache; use crate::Result; #[derive(Clone)] @@ -37,6 +39,133 @@ impl fmt::Display for Upstream { } } +pub fn parse_upstream_addr(s: &str, default_port: u16) -> std::result::Result { + // Try full socket addr first: "1.2.3.4:5353" or "[::1]:5353" + if let Ok(addr) = s.parse::() { + return Ok(addr); + } + // Bare IP: "1.2.3.4" or "::1" + if let Ok(ip) = s.parse::() { + return Ok(SocketAddr::new(ip, default_port)); + } + Err(format!("invalid upstream address: {}", s)) +} + +pub fn parse_upstream(s: &str, default_port: u16) -> Result { + if s.starts_with("https://") { + let client = reqwest::Client::builder() + .use_rustls_tls() + .build() + .unwrap_or_default(); + return Ok(Upstream::Doh { + url: s.to_string(), + client, + }); + } + let addr = parse_upstream_addr(s, default_port)?; + Ok(Upstream::Udp(addr)) +} + +#[derive(Clone)] +pub struct UpstreamPool { + primary: Vec, + fallback: Vec, +} + +impl UpstreamPool { + pub fn new(primary: Vec, fallback: Vec) -> Self { + Self { primary, fallback } + } + + pub fn preferred(&self) -> Option<&Upstream> { + self.primary.first().or(self.fallback.first()) + } + + pub fn set_primary(&mut self, primary: Vec) { + self.primary = primary; + } + + /// Update the primary upstream if `new_addr` (parsed with `port`) differs + /// from the current preferred upstream. Returns `true` if the pool changed. + pub fn maybe_update_primary(&mut self, new_addr: &str, port: u16) -> bool { + let Ok(new_sock) = format!("{}:{}", new_addr, port).parse::() else { + return false; + }; + let new_upstream = Upstream::Udp(new_sock); + if self.preferred() == Some(&new_upstream) { + return false; + } + self.primary = vec![new_upstream]; + true + } + + pub fn label(&self) -> String { + match self.preferred() { + Some(u) => { + let total = self.primary.len() + self.fallback.len(); + if total > 1 { + format!("{} (+{} more)", u, total - 1) + } else { + u.to_string() + } + } + None => "none".to_string(), + } + } +} + +pub async fn forward_with_failover( + query: &DnsPacket, + pool: &UpstreamPool, + srtt: &RwLock, + timeout_duration: Duration, +) -> Result { + // Build candidate list: primary (sorted by SRTT for UDP) then fallback + let mut candidates: Vec<(usize, u64)> = pool + .primary + .iter() + .enumerate() + .map(|(i, u)| { + let rtt = match u { + Upstream::Udp(addr) => srtt.read().unwrap().get(addr.ip()), + _ => 0, // DoH: keep config order (stable sort preserves it) + }; + (i, rtt) + }) + .collect(); + candidates.sort_by_key(|&(_, rtt)| rtt); + + let all_upstreams: Vec<&Upstream> = candidates + .iter() + .map(|&(i, _)| &pool.primary[i]) + .chain(pool.fallback.iter()) + .collect(); + + let mut last_err: Option> = None; + + for upstream in &all_upstreams { + let start = Instant::now(); + match forward_query(query, upstream, timeout_duration).await { + Ok(resp) => { + if let Upstream::Udp(addr) = upstream { + let rtt_ms = start.elapsed().as_millis() as u64; + srtt.write().unwrap().record_rtt(addr.ip(), rtt_ms, false); + } + return Ok(resp); + } + Err(e) => { + if let Upstream::Udp(addr) = upstream { + srtt.write().unwrap().record_failure(addr.ip()); + } + log::debug!("upstream {} failed: {}", upstream, e); + last_err = Some(e); + } + } + } + + Err(last_err.unwrap_or_else(|| "no upstream configured".into())) +} + pub async fn forward_query( query: &DnsPacket, upstream: &Upstream, @@ -271,4 +400,112 @@ mod tests { let result = forward_query(&make_query(), &upstream, Duration::from_millis(100)).await; assert!(result.is_err()); } + + #[test] + fn parse_addr_ip_only() { + let addr = parse_upstream_addr("1.2.3.4", 53).unwrap(); + assert_eq!(addr, "1.2.3.4:53".parse::().unwrap()); + } + + #[test] + fn parse_addr_ip_port() { + let addr = parse_upstream_addr("1.2.3.4:5353", 53).unwrap(); + assert_eq!(addr, "1.2.3.4:5353".parse::().unwrap()); + } + + #[test] + fn parse_addr_ipv6_bracketed() { + let addr = parse_upstream_addr("[::1]:5553", 53).unwrap(); + assert_eq!(addr, "[::1]:5553".parse::().unwrap()); + } + + #[test] + fn parse_addr_ipv6_bare() { + let addr = parse_upstream_addr("::1", 53).unwrap(); + assert_eq!(addr, "[::1]:53".parse::().unwrap()); + } + + #[test] + fn pool_label_single() { + let pool = UpstreamPool::new(vec![Upstream::Udp("1.2.3.4:53".parse().unwrap())], vec![]); + assert_eq!(pool.label(), "1.2.3.4:53"); + } + + #[test] + fn pool_label_multi() { + let pool = UpstreamPool::new( + vec![Upstream::Udp("1.2.3.4:53".parse().unwrap())], + vec![Upstream::Udp("8.8.8.8:53".parse().unwrap())], + ); + assert_eq!(pool.label(), "1.2.3.4:53 (+1 more)"); + } + + #[tokio::test] + async fn failover_tries_next_on_failure() { + // First upstream is unreachable, second responds + let query = make_query(); + let response_bytes = to_wire(&make_response(&query)); + + let app = axum::Router::new().route( + "/dns-query", + axum::routing::post(move || { + let body = response_bytes.clone(); + async move { + ( + [(axum::http::header::CONTENT_TYPE, "application/dns-message")], + body, + ) + } + }), + ); + + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let good_addr = listener.local_addr().unwrap(); + tokio::spawn(axum::serve(listener, app).into_future()); + + // Unreachable UDP upstream + working DoH upstream + let pool = UpstreamPool::new( + vec![ + Upstream::Udp("127.0.0.1:1".parse().unwrap()), // will fail + Upstream::Doh { + url: format!("http://{}/dns-query", good_addr), + client: reqwest::Client::new(), + }, + ], + vec![], + ); + + let srtt = RwLock::new(SrttCache::new(true)); + let result = forward_with_failover(&query, &pool, &srtt, Duration::from_millis(500)) + .await + .expect("should fail over to second upstream"); + + assert_eq!(result.header.id, 0xABCD); + assert_eq!(result.answers.len(), 1); + } + + #[test] + fn maybe_update_primary_swaps_when_different() { + let mut pool = UpstreamPool::new( + vec![Upstream::Udp("1.2.3.4:53".parse().unwrap())], + vec![Upstream::Udp("8.8.8.8:53".parse().unwrap())], + ); + assert!(pool.maybe_update_primary("5.6.7.8", 53)); + assert_eq!(pool.preferred().unwrap().to_string(), "5.6.7.8:53"); + } + + #[test] + fn maybe_update_primary_noop_when_same() { + let mut pool = + UpstreamPool::new(vec![Upstream::Udp("1.2.3.4:53".parse().unwrap())], vec![]); + assert!(!pool.maybe_update_primary("1.2.3.4", 53)); + } + + #[test] + fn maybe_update_primary_rejects_invalid_addr() { + let mut pool = + UpstreamPool::new(vec![Upstream::Udp("1.2.3.4:53".parse().unwrap())], vec![]); + assert!(!pool.maybe_update_primary("not-an-ip", 53)); + assert_eq!(pool.preferred().unwrap().to_string(), "1.2.3.4:53"); + } } diff --git a/src/main.rs b/src/main.rs index 62acb69..9e2d2f8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,7 @@ use numa::buffer::BytePacketBuffer; use numa::cache::DnsCache; use numa::config::{build_zone_map, load_config, ConfigLoad}; use numa::ctx::{handle_query, ServerCtx}; -use numa::forward::Upstream; +use numa::forward::{parse_upstream, Upstream, UpstreamPool}; use numa::override_store::OverrideStore; use numa::query_log::QueryLog; use numa::service_store::ServiceStore; @@ -129,18 +129,18 @@ async fn main() -> numa::Result<()> { let root_hints = numa::recursive::parse_root_hints(&config.upstream.root_hints); - let (resolved_mode, upstream_auto, upstream, upstream_label) = match config.upstream.mode { + let recursive_pool = || { + let dummy = UpstreamPool::new(vec![Upstream::Udp("0.0.0.0:0".parse().unwrap())], vec![]); + (dummy, "recursive (root hints)".to_string()) + }; + + let (resolved_mode, upstream_auto, pool, upstream_label) = match config.upstream.mode { numa::config::UpstreamMode::Auto => { info!("auto mode: probing recursive resolution..."); if numa::recursive::probe_recursive(&root_hints).await { info!("recursive probe succeeded — self-sovereign mode"); - let dummy = Upstream::Udp("0.0.0.0:0".parse().unwrap()); - ( - numa::config::UpstreamMode::Recursive, - false, - dummy, - "recursive (root hints)".to_string(), - ) + let (pool, label) = recursive_pool(); + (numa::config::UpstreamMode::Recursive, false, pool, label) } else { log::warn!("recursive probe failed — falling back to Quad9 DoH"); let client = reqwest::Client::builder() @@ -149,55 +149,45 @@ async fn main() -> numa::Result<()> { .unwrap_or_default(); let url = DOH_FALLBACK.to_string(); let label = url.clone(); - ( - numa::config::UpstreamMode::Forward, - false, - Upstream::Doh { url, client }, - label, - ) + let pool = UpstreamPool::new(vec![Upstream::Doh { url, client }], vec![]); + (numa::config::UpstreamMode::Forward, false, pool, label) } } numa::config::UpstreamMode::Recursive => { - let dummy = Upstream::Udp("0.0.0.0:0".parse().unwrap()); - ( - numa::config::UpstreamMode::Recursive, - false, - dummy, - "recursive (root hints)".to_string(), - ) + let (pool, label) = recursive_pool(); + (numa::config::UpstreamMode::Recursive, false, pool, label) } numa::config::UpstreamMode::Forward => { - let upstream_addr = if config.upstream.address.is_empty() { - system_dns + let addrs = if config.upstream.address.is_empty() { + let detected = system_dns .default_upstream .or_else(numa::system_dns::detect_dhcp_dns) .unwrap_or_else(|| { info!("could not detect system DNS, falling back to Quad9 DoH"); DOH_FALLBACK.to_string() - }) + }); + vec![detected] } else { config.upstream.address.clone() }; - let upstream: Upstream = if upstream_addr.starts_with("https://") { - let client = reqwest::Client::builder() - .use_rustls_tls() - .build() - .unwrap_or_default(); - Upstream::Doh { - url: upstream_addr, - client, - } - } else { - let addr: SocketAddr = - format!("{}:{}", upstream_addr, config.upstream.port).parse()?; - Upstream::Udp(addr) - }; - let label = upstream.to_string(); + let primary: Vec = addrs + .iter() + .map(|s| parse_upstream(s, config.upstream.port)) + .collect::>>()?; + let fallback: Vec = config + .upstream + .fallback + .iter() + .map(|s| parse_upstream(s, config.upstream.port)) + .collect::>>()?; + + let pool = UpstreamPool::new(primary, fallback); + let label = pool.label(); ( numa::config::UpstreamMode::Forward, config.upstream.address.is_empty(), - upstream, + pool, label, ) } @@ -294,7 +284,7 @@ async fn main() -> numa::Result<()> { services: Mutex::new(service_store), lan_peers: Mutex::new(numa::lan::PeerStore::new(config.lan.peer_timeout_secs)), forwarding_rules, - upstream: Mutex::new(upstream), + upstream_pool: Mutex::new(pool), upstream_auto, upstream_port: config.upstream.port, lan_ip: Mutex::new(numa::lan::detect_lan_ip().unwrap_or(std::net::Ipv4Addr::LOCALHOST)), @@ -613,27 +603,17 @@ async fn network_watch_loop(ctx: Arc) { } } - // Re-detect upstream every 30s or on LAN IP change (UDP only — - // DoH upstreams are explicitly configured via URL, not auto-detected) - if ctx.upstream_auto - && matches!(*ctx.upstream.lock().unwrap(), Upstream::Udp(_)) - && (changed || tick.is_multiple_of(6)) - { + // Re-detect upstream every 30s or on LAN IP change (auto-detect only) + if ctx.upstream_auto && (changed || tick.is_multiple_of(6)) { let dns_info = numa::system_dns::discover_system_dns(); let new_addr = dns_info .default_upstream .or_else(numa::system_dns::detect_dhcp_dns) .unwrap_or_else(|| QUAD9_IP.to_string()); - if let Ok(new_sock) = - format!("{}:{}", new_addr, ctx.upstream_port).parse::() - { - let new_upstream = Upstream::Udp(new_sock); - let mut upstream = ctx.upstream.lock().unwrap(); - if *upstream != new_upstream { - info!("upstream changed: {} → {}", upstream, new_upstream); - *upstream = new_upstream; - changed = true; - } + let mut pool = ctx.upstream_pool.lock().unwrap(); + if pool.maybe_update_primary(&new_addr, ctx.upstream_port) { + info!("upstream changed → {}", pool.label()); + changed = true; } } -- 2.34.1 From 777012958917d454a3323f177d272b660ff4972a Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 11 Apr 2026 01:14:04 +0300 Subject: [PATCH 004/139] =?UTF-8?q?feat:=20cache=20warming=20=E2=80=94=20p?= =?UTF-8?q?roactive=20DNS=20resolution=20for=20configured=20domains=20(#78?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves A + AAAA at startup for domains listed in [cache] warm, then re-resolves before TTL expiry (at 75% elapsed). Keeps critical domains always hot in cache with zero client-visible latency. Closes #34 (item 4) Co-authored-by: Claude Opus 4.6 --- src/cache.rs | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/config.rs | 3 ++ src/main.rs | 62 +++++++++++++++++++++++++++++++++++++ 3 files changed, 150 insertions(+) diff --git a/src/cache.rs b/src/cache.rs index d9a2a76..5bdde85 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -82,6 +82,29 @@ impl DnsCache { Some((packet, entry.dnssec_status)) } + pub fn ttl_remaining(&self, domain: &str, qtype: QueryType) -> Option<(u32, u32)> { + let type_map = self.entries.get(domain)?; + let entry = type_map.get(&qtype)?; + let elapsed = entry.inserted_at.elapsed(); + if elapsed >= entry.ttl { + return None; + } + let total = entry.ttl.as_secs() as u32; + let remaining = (entry.ttl - elapsed).as_secs() as u32; + Some((remaining, total)) + } + + pub fn needs_warm(&self, domain: &str) -> bool { + for qtype in [QueryType::A, QueryType::AAAA] { + match self.ttl_remaining(domain, qtype) { + None => return true, + Some((remaining, total)) if remaining < total / 4 => return true, + _ => {} + } + } + false + } + pub fn insert(&mut self, domain: &str, qtype: QueryType, packet: &DnsPacket) { self.insert_with_status(domain, qtype, packet, DnssecStatus::Indeterminate); } @@ -233,4 +256,66 @@ mod tests { cache.insert("example.com", QueryType::A, &pkt); assert!(cache.heap_bytes() > empty); } + + #[test] + fn ttl_remaining_returns_values_for_fresh_entry() { + let mut cache = DnsCache::new(100, 60, 3600); + let mut pkt = DnsPacket::new(); + pkt.answers.push(DnsRecord::A { + domain: "example.com".into(), + addr: "1.2.3.4".parse().unwrap(), + ttl: 300, + }); + cache.insert("example.com", QueryType::A, &pkt); + let (remaining, total) = cache.ttl_remaining("example.com", QueryType::A).unwrap(); + assert_eq!(total, 300); + assert!(remaining <= 300); + assert!(remaining > 0); + } + + #[test] + fn ttl_remaining_none_for_missing() { + let cache = DnsCache::new(100, 1, 3600); + assert!(cache.ttl_remaining("missing.com", QueryType::A).is_none()); + } + + #[test] + fn needs_warm_true_when_missing() { + let cache = DnsCache::new(100, 1, 3600); + assert!(cache.needs_warm("missing.com")); + } + + #[test] + fn needs_warm_false_when_fresh() { + let mut cache = DnsCache::new(100, 1, 3600); + let mut pkt_a = DnsPacket::new(); + pkt_a.answers.push(DnsRecord::A { + domain: "example.com".into(), + addr: "1.2.3.4".parse().unwrap(), + ttl: 300, + }); + let mut pkt_aaaa = DnsPacket::new(); + pkt_aaaa.answers.push(DnsRecord::AAAA { + domain: "example.com".into(), + addr: "::1".parse().unwrap(), + ttl: 300, + }); + cache.insert("example.com", QueryType::A, &pkt_a); + cache.insert("example.com", QueryType::AAAA, &pkt_aaaa); + assert!(!cache.needs_warm("example.com")); + } + + #[test] + fn needs_warm_true_when_only_a_cached() { + let mut cache = DnsCache::new(100, 1, 3600); + let mut pkt = DnsPacket::new(); + pkt.answers.push(DnsRecord::A { + domain: "example.com".into(), + addr: "1.2.3.4".parse().unwrap(), + ttl: 300, + }); + cache.insert("example.com", QueryType::A, &pkt); + // AAAA missing → needs warm + assert!(cache.needs_warm("example.com")); + } } diff --git a/src/config.rs b/src/config.rs index fa794d7..708ed4f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -247,6 +247,8 @@ pub struct CacheConfig { pub min_ttl: u32, #[serde(default = "default_max_ttl")] pub max_ttl: u32, + #[serde(default)] + pub warm: Vec, } impl Default for CacheConfig { @@ -255,6 +257,7 @@ impl Default for CacheConfig { max_entries: default_max_entries(), min_ttl: default_min_ttl(), max_ttl: default_max_ttl(), + warm: Vec::new(), } } } diff --git a/src/main.rs b/src/main.rs index 9e2d2f8..cee680a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -402,6 +402,9 @@ async fn main() -> numa::Result<()> { g, &format!("max {} entries", config.cache.max_entries), ); + if !config.cache.warm.is_empty() { + row("Warm", g, &format!("{} domains", config.cache.warm.len())); + } row( "Blocking", g, @@ -484,6 +487,15 @@ async fn main() -> numa::Result<()> { }); } + // Spawn cache warming for user-configured domains + if !config.cache.warm.is_empty() { + let warm_ctx = Arc::clone(&ctx); + let warm_domains = config.cache.warm.clone(); + tokio::spawn(async move { + cache_warm_loop(warm_ctx, warm_domains).await; + }); + } + // Spawn HTTP API server let api_ctx = Arc::clone(&ctx); let api_addr: SocketAddr = format!("{}:{}", config.server.api_bind_addr, api_port).parse()?; @@ -720,3 +732,53 @@ async fn load_blocklists(ctx: &ServerCtx, lists: &[String]) { downloaded.len() ); } + +async fn warm_domain(ctx: &ServerCtx, domain: &str) { + use numa::question::QueryType; + + for qtype in [QueryType::A, QueryType::AAAA] { + let query = numa::packet::DnsPacket::query(0, domain, qtype); + let result = if ctx.upstream_mode == numa::config::UpstreamMode::Recursive { + numa::recursive::resolve_recursive( + domain, + qtype, + &ctx.cache, + &query, + &ctx.root_hints, + &ctx.srtt, + ) + .await + } else { + let pool = ctx.upstream_pool.lock().unwrap().clone(); + numa::forward::forward_with_failover(&query, &pool, &ctx.srtt, ctx.timeout).await + }; + match result { + Ok(resp) => { + ctx.cache.write().unwrap().insert(domain, qtype, &resp); + log::debug!("cache warm: {} {:?}", domain, qtype); + } + Err(e) => log::warn!("cache warm: {} {:?} failed: {}", domain, qtype, e), + } + } +} + +async fn cache_warm_loop(ctx: Arc, domains: Vec) { + tokio::time::sleep(Duration::from_secs(2)).await; + + for domain in &domains { + warm_domain(&ctx, domain).await; + } + info!("cache warm: {} domains resolved at startup", domains.len()); + + let mut interval = tokio::time::interval(Duration::from_secs(30)); + interval.tick().await; + loop { + interval.tick().await; + for domain in &domains { + let refresh = ctx.cache.read().unwrap().needs_warm(domain); + if refresh { + warm_domain(&ctx, domain).await; + } + } + } +} -- 2.34.1 From 7d6b0ed568e6c8758e93c1ded1cb420683f8e7f5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 11 Apr 2026 04:06:17 +0300 Subject: [PATCH 005/139] feat: DoH server endpoint + DoT enabled by default (#79) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: document multi-forwarder and cache warming in config and README Co-Authored-By: Claude Opus 4.6 * feat: DNS-over-HTTPS server endpoint (RFC 8484) Serve DoH at POST /dns-query on the existing HTTPS proxy (port 443). Automatically enabled when proxy TLS is active — no config needed. Also fix zone map priority so local zones override RFC 6762 .local special-use handling. Co-Authored-By: Claude Opus 4.6 (1M context) * style: cargo fmt Co-Authored-By: Claude Opus 4.6 (1M context) * chore: remove GoatCounter analytics from site GoatCounter domains (goatcounter.com, gc.zgo.at) are blocked by Hagezi Pro, which is Numa's default blocklist. A DNS privacy tool should not embed analytics that its own resolver blocks. Co-Authored-By: Claude Opus 4.6 * feat: enable DoT listener by default DoT now starts automatically with `sudo numa`, matching the proxy and DoH which are already on by default. The self-signed CA infrastructure is shared with the proxy, so there is no additional setup. This makes `numa setup-phone` work out of the box. Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 --- README.md | 2 + numa.toml | 10 ++- site/blog-template.html | 2 - site/blog/index.html | 2 - site/index.html | 2 - src/config.rs | 7 +- src/ctx.rs | 8 +- src/doh.rs | 188 ++++++++++++++++++++++++++++++++++++++++ src/health.rs | 4 + src/lib.rs | 1 + src/main.rs | 9 ++ src/proxy.rs | 36 ++++++-- tests/integration.sh | 48 ++++++++++ 13 files changed, 298 insertions(+), 21 deletions(-) create mode 100644 src/doh.rs diff --git a/README.md b/README.md index 69ecd80..44b8aa4 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,8 @@ From Machine B: `curl http://api.numa` → proxied to Machine A's port 8000. Ena - [x] DNS-over-TLS listener — encrypted client connections (RFC 7858, ALPN strict) - [x] Recursive resolution + DNSSEC — chain-of-trust, NSEC/NSEC3 - [x] SRTT-based nameserver selection +- [x] Multi-forwarder failover — multiple upstreams with SRTT ranking, fallback pool +- [x] Cache warming — proactive resolution for configured domains - [x] Mobile onboarding — `setup-phone` QR flow, mobile API, mobileconfig profiles - [ ] pkarr integration — self-sovereign DNS via Mainline DHT - [ ] Global `.numa` names — DHT-backed, no registrar diff --git a/numa.toml b/numa.toml index 4389fdb..92b5411 100644 --- a/numa.toml +++ b/numa.toml @@ -12,10 +12,11 @@ api_port = 5380 # [upstream] # mode = "forward" # "forward" (default) — relay to upstream # # "recursive" — resolve from root hints (no address needed) +# address = "9.9.9.9" # single upstream (plain UDP) +# address = ["192.168.1.1", "9.9.9.9:5353"] # multiple upstreams — SRTT picks fastest # address = "https://dns.quad9.net/dns-query" # DNS-over-HTTPS (encrypted) -# address = "https://cloudflare-dns.com/dns-query" # Cloudflare DoH -# address = "9.9.9.9" # plain UDP -# port = 53 # only for forward mode, plain UDP +# fallback = ["8.8.8.8", "1.1.1.1"] # tried only when all primaries fail +# port = 53 # default port for addresses without :port # timeout_ms = 3000 # root_hints = [ # only used in recursive mode # "198.41.0.4", # a.root-servers.net (Verisign) @@ -54,6 +55,7 @@ api_port = 5380 max_entries = 10000 min_ttl = 60 max_ttl = 86400 +# warm = ["google.com", "github.com"] # resolve at startup, refresh before TTL expiry [proxy] enabled = true @@ -91,7 +93,7 @@ tld = "numa" # DNS-over-TLS listener (RFC 7858) — encrypted DNS on port 853 # [dot] -# enabled = false # opt-in: accept DoT queries +# enabled = true # on by default; set false to disable # port = 853 # standard DoT port # bind_addr = "0.0.0.0" # IPv4 or IPv6; unspecified binds all interfaces # cert_path = "/etc/numa/dot.crt" # PEM cert; omit to use self-signed (proxy CA if available) diff --git a/site/blog-template.html b/site/blog-template.html index 85e854b..54f0eae 100644 --- a/site/blog-template.html +++ b/site/blog-template.html @@ -298,7 +298,5 @@ $body$ Blog - diff --git a/site/blog/index.html b/site/blog/index.html index 10d62a7..993c166 100644 --- a/site/blog/index.html +++ b/site/blog/index.html @@ -197,7 +197,5 @@ body::before { Home - diff --git a/site/index.html b/site/index.html index 27ea8fb..0231e0a 100644 --- a/site/index.html +++ b/site/index.html @@ -1769,7 +1769,5 @@ const observer = new IntersectionObserver((entries) => { document.querySelectorAll('.reveal').forEach(el => observer.observe(el)); - diff --git a/src/config.rs b/src/config.rs index 708ed4f..6480883 100644 --- a/src/config.rs +++ b/src/config.rs @@ -411,7 +411,7 @@ pub struct DnssecConfig { #[derive(Deserialize, Clone)] pub struct DotConfig { - #[serde(default)] + #[serde(default = "default_dot_enabled")] pub enabled: bool, #[serde(default = "default_dot_port")] pub port: u16, @@ -428,7 +428,7 @@ pub struct DotConfig { impl Default for DotConfig { fn default() -> Self { DotConfig { - enabled: false, + enabled: default_dot_enabled(), port: default_dot_port(), bind_addr: default_dot_bind_addr(), cert_path: None, @@ -437,6 +437,9 @@ impl Default for DotConfig { } } +fn default_dot_enabled() -> bool { + true +} fn default_dot_port() -> u16 { 853 } diff --git a/src/ctx.rs b/src/ctx.rs index b4e0777..3ef6a0a 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -110,6 +110,10 @@ pub async fn resolve_query( 300, )); (resp, QueryPath::Local, DnssecStatus::Indeterminate) + } else if let Some(records) = ctx.zone_map.get(qname.as_str()).and_then(|m| m.get(&qtype)) { + let mut resp = DnsPacket::response_from(&query, ResultCode::NOERROR); + resp.answers = records.clone(); + (resp, QueryPath::Local, DnssecStatus::Indeterminate) } else if is_special_use_domain(&qname) { // RFC 6761/8880: private PTR, DDR, NAT64 — answer locally let resp = special_use_response(&query, &qname, qtype); @@ -158,10 +162,6 @@ pub async fn resolve_query( 60, )); (resp, QueryPath::Blocked, DnssecStatus::Indeterminate) - } else if let Some(records) = ctx.zone_map.get(qname.as_str()).and_then(|m| m.get(&qtype)) { - let mut resp = DnsPacket::response_from(&query, ResultCode::NOERROR); - resp.answers = records.clone(); - (resp, QueryPath::Local, DnssecStatus::Indeterminate) } else { let cached = ctx.cache.read().unwrap().lookup_with_status(&qname, qtype); if let Some((cached, cached_dnssec)) = cached { diff --git a/src/doh.rs b/src/doh.rs new file mode 100644 index 0000000..cf50b31 --- /dev/null +++ b/src/doh.rs @@ -0,0 +1,188 @@ +use std::net::SocketAddr; + +use axum::body::Bytes; +use axum::extract::{Request, State}; +use axum::response::{IntoResponse, Response}; +use hyper::StatusCode; +use log::warn; + +use crate::buffer::BytePacketBuffer; +use crate::ctx::{resolve_query, ServerCtx}; +use crate::header::ResultCode; +use crate::packet::DnsPacket; + +const MAX_DNS_MSG: usize = 4096; +const DOH_CONTENT_TYPE: &str = "application/dns-message"; + +pub async fn doh_post(State(state): State, req: Request) -> Response { + let host = super::proxy::extract_host(&req); + if !is_doh_host(host.as_deref(), &state.ctx.proxy_tld) { + return StatusCode::NOT_FOUND.into_response(); + } + + let content_type = req + .headers() + .get(hyper::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + if !content_type.starts_with(DOH_CONTENT_TYPE) { + return StatusCode::UNSUPPORTED_MEDIA_TYPE.into_response(); + } + + let body = match axum::body::to_bytes(req.into_body(), MAX_DNS_MSG).await { + Ok(b) => b, + Err(_) => { + return (StatusCode::PAYLOAD_TOO_LARGE, "body exceeds 4096 bytes").into_response() + } + }; + + if body.is_empty() { + return (StatusCode::BAD_REQUEST, "empty body").into_response(); + } + + let src = state + .remote_addr + .unwrap_or_else(|| SocketAddr::from(([127, 0, 0, 1], 0))); + + resolve_doh(&body, src, &state.ctx).await +} + +fn is_doh_host(host: Option<&str>, tld: &str) -> bool { + match host { + Some(h) if h == tld => true, + Some(h) => { + h.len() == 2 * tld.len() + 1 + && h.starts_with(tld) + && h.as_bytes().get(tld.len()) == Some(&b'.') + && h.ends_with(tld) + } + None => false, + } +} + +async fn resolve_doh(dns_bytes: &[u8], src: SocketAddr, ctx: &ServerCtx) -> Response { + let mut buffer = BytePacketBuffer::from_bytes(dns_bytes); + let query = match DnsPacket::from_buffer(&mut buffer) { + Ok(q) => q, + Err(e) => { + warn!("DoH: parse error from {}: {}", src, e); + let query_id = u16::from_be_bytes([ + dns_bytes.first().copied().unwrap_or(0), + dns_bytes.get(1).copied().unwrap_or(0), + ]); + let mut resp = DnsPacket::new(); + resp.header.id = query_id; + resp.header.response = true; + resp.header.rescode = ResultCode::FORMERR; + return serialize_response(&resp); + } + }; + + let query_id = query.header.id; + let query_rd = query.header.recursion_desired; + let questions = query.questions.clone(); + + match resolve_query(query, src, ctx).await { + Ok(resp_buffer) => { + let min_ttl = extract_min_ttl(resp_buffer.filled()); + dns_response(resp_buffer.filled(), min_ttl) + } + Err(e) => { + warn!("DoH: resolve error for {}: {}", src, e); + let mut resp = DnsPacket::new(); + resp.header.id = query_id; + resp.header.response = true; + resp.header.recursion_desired = query_rd; + resp.header.recursion_available = true; + resp.header.rescode = ResultCode::SERVFAIL; + resp.questions = questions; + serialize_response(&resp) + } + } +} + +fn extract_min_ttl(wire: &[u8]) -> u32 { + let mut buf = BytePacketBuffer::from_bytes(wire); + match DnsPacket::from_buffer(&mut buf) { + Ok(pkt) => pkt.answers.iter().map(|r| r.ttl()).min().unwrap_or(0), + Err(_) => 0, + } +} + +fn dns_response(wire: &[u8], min_ttl: u32) -> Response { + ( + StatusCode::OK, + [ + (hyper::header::CONTENT_TYPE, DOH_CONTENT_TYPE), + ( + hyper::header::CACHE_CONTROL, + &format!("max-age={}", min_ttl), + ), + ], + Bytes::copy_from_slice(wire), + ) + .into_response() +} + +fn serialize_response(pkt: &DnsPacket) -> Response { + let mut buf = BytePacketBuffer::new(); + match pkt.write(&mut buf) { + Ok(_) => dns_response(buf.filled(), 0), + Err(_) => StatusCode::INTERNAL_SERVER_ERROR.into_response(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::buffer::BytePacketBuffer; + use crate::header::ResultCode; + use crate::packet::DnsPacket; + use crate::record::DnsRecord; + + #[test] + fn is_doh_host_matches_tld() { + assert!(is_doh_host(Some("numa"), "numa")); + assert!(is_doh_host(Some("numa.numa"), "numa")); + assert!(!is_doh_host(Some("foo.numa"), "numa")); + assert!(!is_doh_host(None, "numa")); + } + + #[test] + fn extract_min_ttl_from_response() { + let mut pkt = DnsPacket::new(); + pkt.header.response = true; + pkt.answers.push(DnsRecord::A { + domain: "example.com".to_string(), + addr: std::net::Ipv4Addr::new(1, 2, 3, 4), + ttl: 300, + }); + pkt.answers.push(DnsRecord::A { + domain: "example.com".to_string(), + addr: std::net::Ipv4Addr::new(5, 6, 7, 8), + ttl: 60, + }); + let mut buf = BytePacketBuffer::new(); + pkt.write(&mut buf).unwrap(); + assert_eq!(extract_min_ttl(buf.filled()), 60); + } + + #[test] + fn extract_min_ttl_no_answers() { + let mut pkt = DnsPacket::new(); + pkt.header.response = true; + let mut buf = BytePacketBuffer::new(); + pkt.write(&mut buf).unwrap(); + assert_eq!(extract_min_ttl(buf.filled()), 0); + } + + #[test] + fn serialize_formerr_response() { + let mut pkt = DnsPacket::new(); + pkt.header.id = 0xABCD; + pkt.header.response = true; + pkt.header.rescode = ResultCode::FORMERR; + let resp = serialize_response(&pkt); + assert_eq!(resp.status(), StatusCode::OK); + } +} diff --git a/src/health.rs b/src/health.rs index b2359c4..e55c569 100644 --- a/src/health.rs +++ b/src/health.rs @@ -73,11 +73,15 @@ impl HealthMeta { recursive_enabled: bool, mdns_enabled: bool, blocking_enabled: bool, + doh_enabled: bool, ) -> Self { let ca_path = data_dir.join("ca.pem"); let ca_fingerprint_sha256 = compute_ca_fingerprint(&ca_path); let mut features = Vec::new(); + if doh_enabled { + features.push("doh".to_string()); + } if dot_enabled { features.push("dot".to_string()); } diff --git a/src/lib.rs b/src/lib.rs index 066c7ca..be71125 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ pub mod cache; pub mod config; pub mod ctx; pub mod dnssec; +pub mod doh; pub mod dot; pub mod forward; pub mod header; diff --git a/src/main.rs b/src/main.rs index cee680a..903be9a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -243,6 +243,7 @@ async fn main() -> numa::Result<()> { None }; + let doh_enabled = initial_tls.is_some(); let health_meta = numa::health::HealthMeta::build( &resolved_data_dir, config.dot.enabled, @@ -252,6 +253,7 @@ async fn main() -> numa::Result<()> { resolved_mode == numa::config::UpstreamMode::Recursive, config.lan.enabled, config.blocking.enabled, + doh_enabled, ); let ca_pem = std::fs::read_to_string(resolved_data_dir.join("ca.pem")).ok(); @@ -431,6 +433,13 @@ async fn main() -> numa::Result<()> { if config.dot.enabled { row("DoT", g, &format!("tls://:{}", config.dot.port)); } + if doh_enabled { + row( + "DoH", + g, + &format!("https://:{}/dns-query", config.proxy.tls_port), + ); + } if config.lan.enabled { row("LAN", g, "mDNS (_numa._tcp.local)"); } diff --git a/src/proxy.rs b/src/proxy.rs index 244e597..b158d9b 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use axum::body::Body; use axum::extract::{Request, State}; use axum::response::IntoResponse; -use axum::routing::any; +use axum::routing::{any, post}; use axum::Router; use http_body_util::BodyExt; use hyper::StatusCode; @@ -18,6 +18,14 @@ use crate::ctx::ServerCtx; type HttpClient = Client; +/// State passed to the DoH handler. Includes the remote address so +/// `resolve_query` can log the client IP. +#[derive(Clone)] +pub struct DohState { + pub ctx: Arc, + pub remote_addr: Option, +} + #[derive(Clone)] struct ProxyState { ctx: Arc, @@ -74,9 +82,17 @@ pub async fn start_proxy_tls(ctx: Arc, port: u16, bind_addr: Ipv4Addr // Hold a separate Arc so we can access tls_config after ctx moves into ProxyState let tls_holder = Arc::clone(&ctx); - let state = ProxyState { ctx, client }; + let proxy_state = ProxyState { + ctx: Arc::clone(&ctx), + client, + }; - let app = Router::new().fallback(any(proxy_handler)).with_state(state); + // DoH route (RFC 8484) served only on the TLS listener. + // DohState.remote_addr is set per-connection below. + let doh_state = DohState { + ctx, + remote_addr: None, + }; loop { let (tcp_stream, remote_addr) = match listener.accept().await { @@ -91,7 +107,17 @@ pub async fn start_proxy_tls(ctx: Arc, port: u16, bind_addr: Ipv4Addr // unwrap safe: guarded by is_none() check above let acceptor = TlsAcceptor::from(Arc::clone(&*tls_holder.tls_config.as_ref().unwrap().load())); - let app = app.clone(); + + let mut conn_doh_state = doh_state.clone(); + conn_doh_state.remote_addr = Some(remote_addr); + + let app = Router::new() + .route( + "/dns-query", + post(crate::doh::doh_post).with_state(conn_doh_state), + ) + .fallback(any(proxy_handler)) + .with_state(proxy_state.clone()); tokio::spawn(async move { let tls_stream = match acceptor.accept(tcp_stream).await { @@ -232,7 +258,7 @@ pre .str {{ color: #d48a5a }} ) } -fn extract_host(req: &Request) -> Option { +pub fn extract_host(req: &Request) -> Option { req.headers() .get(hyper::header::HOST) .and_then(|v| v.to_str().ok()) diff --git a/tests/integration.sh b/tests/integration.sh index 473356e..92da878 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -622,6 +622,54 @@ CONF "10.0.0.1" \ "$($KDIG +short dot-test.example A 2>/dev/null)" + echo "" + echo "=== DNS-over-HTTPS (RFC 8484) ===" + + DOH_QUERY_FILE=/tmp/numa-doh-query.bin + DOH_RESP_FILE=/tmp/numa-doh-resp.bin + + # Build DNS wire-format query for dot-test.example A + printf '\x00\x01\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x08dot-test\x07example\x00\x00\x01\x00\x01' > "$DOH_QUERY_FILE" + + # POST valid DoH query + DOH_CODE=$(curl -sk -X POST \ + --resolve "numa.numa:$PROXY_HTTPS_PORT:127.0.0.1" \ + -H "Content-Type: application/dns-message" \ + --data-binary @"$DOH_QUERY_FILE" \ + --cacert "$CA" \ + -o "$DOH_RESP_FILE" \ + -w "%{http_code}" \ + "https://numa.numa:$PROXY_HTTPS_PORT/dns-query") + check "DoH POST returns HTTP 200" "200" "$DOH_CODE" + + # Check response contains IP 10.0.0.1 (hex: 0a000001) + DOH_HEX=$(xxd -p "$DOH_RESP_FILE" | tr -d '\n') + if echo "$DOH_HEX" | grep -q "0a000001"; then + check "DoH response resolves dot-test.example → 10.0.0.1" "found" "found" + else + check "DoH response resolves dot-test.example → 10.0.0.1" "0a000001" "$DOH_HEX" + fi + + # Wrong Content-Type → 415 + DOH_CT_CODE=$(curl -sk -X POST \ + -H "Host: numa.numa" \ + -H "Content-Type: text/plain" \ + --data-binary @"$DOH_QUERY_FILE" \ + -o /dev/null -w "%{http_code}" \ + "https://127.0.0.1:$PROXY_HTTPS_PORT/dns-query") + check "DoH wrong Content-Type → 415" "415" "$DOH_CT_CODE" + + # Wrong host → 404 (DoH only serves numa.numa) + DOH_HOST_CODE=$(curl -sk -X POST \ + -H "Host: foo.numa" \ + -H "Content-Type: application/dns-message" \ + --data-binary @"$DOH_QUERY_FILE" \ + -o /dev/null -w "%{http_code}" \ + "https://127.0.0.1:$PROXY_HTTPS_PORT/dns-query") + check "DoH wrong host → 404" "404" "$DOH_HOST_CODE" + + rm -f "$DOH_QUERY_FILE" "$DOH_RESP_FILE" + echo "" echo "=== Proxy TLS works with DoT enabled ===" -- 2.34.1 From 156b68de87c31e9f877289ad9582a46087fd8435 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 11 Apr 2026 04:17:46 +0300 Subject: [PATCH 006/139] fix: replace unscannable QR art with placeholder in blog post (#80) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Unicode block-character QR code in the DoT blog post can't be scanned by phone cameras due to HTML font metrics distorting the grid. Replace with a bordered placeholder box — the dashboard screenshot already shows a working QR. Co-authored-by: Claude Opus 4.6 (1M context) --- blog/dot-from-scratch.md | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/blog/dot-from-scratch.md b/blog/dot-from-scratch.md index b4bb70b..1775943 100644 --- a/blog/dot-from-scratch.md +++ b/blog/dot-from-scratch.md @@ -134,24 +134,12 @@ $ numa setup-phone Profile URL: http://192.168.1.10:8765/mobileconfig - █████████████████████████████████████ - █████████████████████████████████████ - ████ ▄▄▄▄▄ ██ ▀█ ▀▀▀▄▀ ▀▀█ ▄▄▄▄▄ ████ - ████ █ █ █ ▄▀ ▄█▀▄▀█▄▀█ █ █ ████ - ████ █▄▄▄█ █ ▀▄▄ ▀ █▄▀▀█▀█ █▄▄▄█ ████ - ████▄▄▄▄▄▄▄█ ▀▄▀▄█▄█ █▄█▄█▄▄▄▄▄▄▄████ - ████ ▀▄▄▄▄▄█▀ ▀██▄ ▄ ▄▀█▀█ ▄ ▄▄█▀████ - █████▄▄▀▄▀▄▄█▄ ▀████▀▄▄▀█▀▀▄ ██▀█████ - ████▄██▄ ▀▄ █ █ █▀█▄▄██ ▄▄▀▄▀▄ █▀████ - █████ ▀ ▄▀ ▄▀▄ ▄▄▀ ██ ▄▀██▄▀█████ - ████ ▀▀ █▄█▄▀ ▄ █▄ ▄█▀▄ ▀█▀▀ █▀████ - ████ ██▀█ ▄▄▀█▄▄██▀▄▀ ▀█▄▀ █▀▄▄▀█████ - ████▄█▄▄▄▄▄█▀▄█▄█▀▀ ▀██▀ ▄▄▄ ▀ ████ - ████ ▄▄▄▄▄ █▀▀▀▀ ▄█▀ ▀▄ █▄█ ▄▄▀█████ - ████ █ █ █ ▄ ██▀▄ ▄▄██ ▄ ▄▄▄██████ - ████ █▄▄▄█ █▄ ▄▀▀▄▄█▀▄▀▄ ▀▄▀ ▄█ █████ - ████▄▄▄▄▄▄▄█▄▄█▄▄▄█▄█▄▄██████▄▄██████ - █████████████████████████████████████ + ██████████████████████████████ + ██ ██ + ██ [QR code rendered in ██ + ██ your terminal] ██ + ██ ██ + ██████████████████████████████ On your iPhone: 1. Open Camera, point at the QR code, tap the yellow banner -- 2.34.1 From 2de1bc2efc53e5179692528c1a1c034427e15df3 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 11 Apr 2026 12:15:40 +0300 Subject: [PATCH 007/139] chore: bump version to 0.12.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f64e765..86f96da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1144,7 +1144,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.11.0" +version = "0.12.0" dependencies = [ "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index 95e094b..aa67dd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.11.0" +version = "0.12.0" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" -- 2.34.1 From fb4cbe0b2a60799e30df2582f7cba1b072217d7a Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 11 Apr 2026 14:08:09 +0300 Subject: [PATCH 008/139] =?UTF-8?q?chore:=20update=20DoT=20blog=20post=20?= =?UTF-8?q?=E2=80=94=20mark=20DoH=20server=20as=20shipped=20in=20v0.12.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- blog/dot-from-scratch.md | 2 +- site/blog/posts/dot-from-scratch.html | 553 ++++++++++++++++++++++++++ 2 files changed, 554 insertions(+), 1 deletion(-) create mode 100644 site/blog/posts/dot-from-scratch.html diff --git a/blog/dot-from-scratch.md b/blog/dot-from-scratch.md index 1775943..448f185 100644 --- a/blog/dot-from-scratch.md +++ b/blog/dot-from-scratch.md @@ -169,7 +169,7 @@ I've been dogfooding this since v0.10 shipped in early April. The phone resolves ## What's next -- **DoH server** — Numa already has a DoH client; the other half unlocks Firefox's built-in DoH setting pointing at Numa. +- ~~**DoH server**~~ — shipped in v0.12.0. `POST /dns-query` accepts [RFC 8484](https://datatracker.ietf.org/doc/html/rfc8484) wire-format queries, so Firefox/Chrome can point their built-in DoH at Numa. - **DoQ server (RFC 9250)** — DNS over QUIC. Android 14+ supports it natively. - **DDR (RFC 9462)** — auto-discovery via `_dns.resolver.arpa IN SVCB`, so phones pick up a moved Numa instance without the installed profile going stale. diff --git a/site/blog/posts/dot-from-scratch.html b/site/blog/posts/dot-from-scratch.html new file mode 100644 index 0000000..a620f3b --- /dev/null +++ b/site/blog/posts/dot-from-scratch.html @@ -0,0 +1,553 @@ + + + + + +DNS-over-TLS from Scratch in Rust — Numa + + + + + + + + +
+
+

DNS-over-TLS from Scratch in Rust

+ +
+ +

The previous post +ended with “DoT — the last encrypted transport we don’t support.” This +post is about building it.

+

Numa now runs a DoT listener on port 853. My iPhone uses it as its +system resolver, so ad blocking, DNSSEC validation, and recursive +resolution follow my phone through the day. No cloud, no account, no +companion app — a self-signed cert, a .mobileconfig +profile, and a QR code in the terminal.

+

RFC 7858 is ten pages. The hard parts weren’t in the RFC. They were +in cross-protocol confusion defenses, a crypto-provider init gotcha that +only triggered in one specific config combination, and a certificate SAN +bug iOS was happy to accept and kdig immediately rejected. +This post is about those parts.

+

Why DoT when you already have +DoH?

+

Numa has shipped DoH since v0.1. Both protocols tunnel DNS over TLS; +DoH wraps queries in HTTP/2, DoT is DNS-over-TCP with TLS in front. Same +privacy guarantees, different wrapper.

+

The answer to “why both” is that phones ask for DoT by +name. iOS system DNS configures it with two fields (IP + server +name) instead of a URL template. Android 9+ “Private DNS” speaks DoT +natively. Linux stubs default to DoT. I wanted my phone on Numa without +installing anything on the phone itself, and DoT is the protocol iOS and +Android already speak for that.

+

The wire format is +refreshingly small

+

RFC 7858 is one sentence of wire protocol: DNS-over-TCP (RFC 1035 +§4.2.2) with TLS in front, on port 853. DNS-over-TCP has existed +since 1987 — a 2-byte length prefix followed by the DNS message. DoT is +that, wrapped in a TLS session. The entire framing code is seven +lines:

+
async fn write_framed<S>(stream: &mut S, msg: &[u8]) -> io::Result<()>
+where S: AsyncWriteExt + Unpin {
+    let mut out = Vec::with_capacity(2 + msg.len());
+    out.extend_from_slice(&(msg.len() as u16).to_be_bytes());
+    out.extend_from_slice(msg);
+    stream.write_all(&out).await?;
+    stream.flush().await
+}
+

Reads are symmetric: read_exact two bytes, convert to +u16, read_exact that many bytes. No HTTP +headers, no chunked encoding, no framing layer.

+

Persistent connections

+

A fresh TCP+TLS handshake is at least 3 RTTs — about 300ms on a 100ms +connection, 60× the cost of a UDP query. RFC 7858 §3.4 says clients +SHOULD reuse the TCP connection for multiple queries, and every real DoT +client does: iOS, Android, systemd, stubby. A single connection often +carries hundreds of queries.

+

Timing diagram comparing a DNS lookup over plain UDP (1 RTT), over DoT on a fresh connection (3 RTTs — TCP handshake, TLS 1.3 handshake, then the query), and over a reused DoT session (1 RTT, same as UDP).

+

The amortization point is the whole game. If you only ever do one +query per connection, DoT is roughly 3× slower than UDP and you should +not use it. If you reuse the same TLS session for a browsing session’s +worth of queries, the handshake is paid once and every subsequent query +is effectively free.

+

The server is a loop that reads a length-prefixed message, resolves +it, writes the response framed the same way, waits for the next one. +Three timeouts keep it honest:

+
    +
  • Handshake timeout (10s) — a slowloris that opens +TCP but never sends a ClientHello can’t pin a worker.
  • +
  • Idle timeout (30s) — a connected client with +nothing to say gets dropped.
  • +
  • Write timeout (10s) — a stalled reader can’t hold a +response buffer indefinitely.
  • +
+

A semaphore caps concurrent connections at 512 so a burst of +handshakes can’t exhaust the tokio runtime.

+

ALPN, the +cross-protocol defense that matters

+

If DoT lives on port 853 and HTTPS on 443, what stops an HTTP/2 +client from hitting 853 and getting confused replies? Cross-protocol attacks exist and +have had real CVEs. The defense is ALPN: during the TLS handshake the +client advertises protocols, the server picks one it supports or fails. +A DoT server advertises "dot"; a client offering only +"h2" gets a no_application_protocol fatal +alert before any frames are exchanged.

+

rustls enforces this by default when you set +alpn_protocols:

+
let mut config = ServerConfig::builder()
+    .with_no_client_auth()
+    .with_single_cert(certs, key)?;
+config.alpn_protocols = vec![b"dot".to_vec()];
+

“The library enforces it by default” has a latent risk: a future +rustls upgrade could change the default, and the defense would quietly +evaporate. I wrote a test that pins the behavior so any regression in a +dependency update fails loudly:

+
#[tokio::test]
+async fn dot_rejects_non_dot_alpn() {
+    let (addr, cert_der) = spawn_dot_server().await;
+    let client_config = dot_client(&cert_der, vec![b"h2".to_vec()]);
+    let connector = tokio_rustls::TlsConnector::from(client_config);
+    let tcp = tokio::net::TcpStream::connect(addr).await.unwrap();
+    let result = connector
+        .connect(ServerName::try_from("numa.numa").unwrap(), tcp)
+        .await;
+    assert!(result.is_err(),
+        "DoT server must reject ALPN that doesn't include \"dot\"");
+}
+

When you’re leaning on a library’s default for a security-critical +invariant, the test is the contract.

+

Two bugs that hid for days

+

Both were fixed before v0.10 shipped. Both stayed hidden because my +initial tests used permissive clients.

+

The rustls crypto provider +panic

+

rustls 0.23 requires a CryptoProvider installed before +you can build a ServerConfig. Numa’s HTTPS proxy calls +install_default as a side effect when it builds its own +config, so DoT “just worked” for users who enabled both — the proxy had +already initialized the provider before DoT’s first handshake.

+

Then I added support for user-provided DoT certificates. Someone +running DoT with their own Let’s Encrypt cert, with the HTTPS proxy +disabled, would hit:

+
thread 'dot' panicked at rustls-0.23.25/src/crypto/mod.rs:185:14:
+no process-level CryptoProvider available -- call
+CryptoProvider::install_default() before this point
+

The panic happened on the first client connection, not at startup. +While writing the integration suite for “DoT with BYO cert, proxy +disabled” — the one combination nobody had ever actually exercised — the +first run panicked. Fix is two lines: call install_default +inside load_tls_config so DoT can stand alone. If a side +effect initializes something and you have a path that skips that side +effect, you have a bug waiting for a specific deployment.

+

The SAN bug iOS was happy +to accept

+

Numa’s self-signed DoT cert is generated on first run from a local CA +alongside the data directory. It needs to match whatever +ServerName the client sends as SNI. For the HTTPS proxy, +that’s the wildcard domain pattern *.numa (matching +frontend.numa, api.numa, etc.). I initially +reused the same SAN list for DoT: a wildcard *.numa and +nothing else.

+

On an iPhone this worked perfectly. Full browsing session, persistent +connections in the log, ad blocking active. I was about to merge when I +ran one last smoke test with kdig (GnuTLS-backed, from Knot DNS):

+
$ kdig @192.168.1.16 -p 853 +tls \
+    +tls-ca=/usr/local/var/numa/ca.pem \
+    +tls-hostname=numa.numa example.com A
+
+;; TLS, handshake failed (Error in the certificate.)
+

Huh.

+

RFC +6125 §6.4.3: a wildcard in a certificate’s DNS-ID matches exactly +one label. *.numa matches frontend.numa, but +not numa.numa, because the wildcard wants at least one +label to substitute and strict clients reject wildcards in the leftmost +label under single-label TLDs as ambiguous.

+

iOS’s TLS stack is lenient and accepts it. GnuTLS, NSS (Firefox), and +most non-Apple validators don’t. The fix is five lines — add an explicit +numa.numa SAN alongside the wildcard. But the lesson is the +one that stuck: I wrote a commit message saying “fix an iOS bug” and had +to rewrite it, because iOS was fine. The real bug was that every +GnuTLS/NSS-based client on the planet would have rejected the cert, and +I only found it by running one more test with a stricter tool.

+
+

Test with the strict client. The permissive client hides your +bugs.

+
+

Getting your phone onto it

+

A DoT server is useless without a way to point a phone at it. iOS +won’t let you type an IP and a server name into Settings directly — you +install a .mobileconfig profile that bundles the CA as a +trust anchor and the DNS settings in a single payload.

+

Numa ships a subcommand that builds one on the fly and serves it over +a QR code in the terminal:

+
$ numa setup-phone
+
+  Numa Phone Setup
+
+  Profile URL: http://192.168.1.10:8765/mobileconfig
+
+  ██████████████████████████████
+  ██                          ██
+  ██   [QR code rendered in   ██
+  ██    your terminal]        ██
+  ██                          ██
+  ██████████████████████████████
+
+  On your iPhone:
+    1. Open Camera, point at the QR code, tap the yellow banner
+    2. Allow the download when Safari asks
+    3. Open Settings — tap "Profile Downloaded" near the top
+       (or: Settings → General → VPN & Device Management → Numa DNS)
+    4. Tap Install (top right), enter passcode, Install again
+    5. Settings → General → About → Certificate Trust Settings
+       Toggle ON "Numa Local CA" — required for DoT to work
+

The same QR is available in the dashboard — click “Phone Setup” in +the header and the popover renders an SVG QR code pointing at the +mobileconfig URL. On mobile viewports it shows a direct download link +instead.

+

Numa dashboard with Phone Setup popover showing QR code and install instructions

+

Step 4 is non-negotiable. Even though the CA is bundled in the same +profile that installs the DNS settings, iOS still requires the user to +explicitly toggle trust in Certificate Trust Settings. It’s a deliberate +iOS policy to prevent profile-based trust injection — annoying, and +correct.

+

I’ve been dogfooding this since v0.10 shipped in early April. The +phone resolves through Numa over DoT whenever I’m home; persistent +connections are visible in the log as a single source port living +through dozens of queries. The one real caveat: if the laptop’s LAN IP +changes, the profile breaks. RFC 9462 DDR +fixes that — Numa can respond to _dns.resolver.arpa IN SVCB +with its current IP and iOS picks it up on each network join. Next piece +of work.

+

What I learned

+

RFC-level small, API-level hard. RFC 7858 is ten +pages. The framing is trivial. But the subtle stuff — ALPN, timeouts, +connection caps, handshake vs idle vs write deadlines, backoff on accept +errors — isn’t in the RFC. Miss any of it and you leak a DoS vector or a +protocol confusion hole.

+

Your test matrix is your security matrix. Both bugs +in this post were hidden by lenient clients. In both cases the strict +client — kdig, or a specific config combination — surfaced the bug +instantly. Pick test tools for strictness, not convenience. The moment +you find yourself thinking “but iOS accepts it,” stop and run kdig.

+

Don’t initialize global state via side effects. +“Module A installs a global, module B silently depends on it, disabling +A breaks B” is a bug pattern that keeps coming back. Fix: have module B +initialize its dependency explicitly, even if it means calling an +idempotent install_default twice. The dependency graph +should be local and obvious.

+

What’s next

+
    +
  • DoH server — shipped in v0.12.0. +POST /dns-query accepts RFC 8484 +wire-format queries, so Firefox/Chrome can point their built-in DoH at +Numa.
  • +
  • DoQ server (RFC 9250) — DNS over QUIC. Android 14+ +supports it natively.
  • +
  • DDR (RFC 9462) — auto-discovery via +_dns.resolver.arpa IN SVCB, so phones pick up a moved Numa +instance without the installed profile going stale.
  • +
+

The code is at github.com/razvandimescu/numa +— the DoT listener is in src/dot.rs +and the phone onboarding flow is in src/setup_phone.rs +and src/mobileconfig.rs. +MIT license.

+
+ + + + + -- 2.34.1 From 289f2b973b146bad2f30b4a7846c6ae91a7cb363 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 11 Apr 2026 14:10:13 +0300 Subject: [PATCH 009/139] chore: remove built blog HTML from tracking (built by CI) Co-Authored-By: Claude Opus 4.6 (1M context) --- site/blog/posts/dot-from-scratch.html | 553 -------------------------- 1 file changed, 553 deletions(-) delete mode 100644 site/blog/posts/dot-from-scratch.html diff --git a/site/blog/posts/dot-from-scratch.html b/site/blog/posts/dot-from-scratch.html deleted file mode 100644 index a620f3b..0000000 --- a/site/blog/posts/dot-from-scratch.html +++ /dev/null @@ -1,553 +0,0 @@ - - - - - -DNS-over-TLS from Scratch in Rust — Numa - - - - - - - - -
-
-

DNS-over-TLS from Scratch in Rust

- -
- -

The previous post -ended with “DoT — the last encrypted transport we don’t support.” This -post is about building it.

-

Numa now runs a DoT listener on port 853. My iPhone uses it as its -system resolver, so ad blocking, DNSSEC validation, and recursive -resolution follow my phone through the day. No cloud, no account, no -companion app — a self-signed cert, a .mobileconfig -profile, and a QR code in the terminal.

-

RFC 7858 is ten pages. The hard parts weren’t in the RFC. They were -in cross-protocol confusion defenses, a crypto-provider init gotcha that -only triggered in one specific config combination, and a certificate SAN -bug iOS was happy to accept and kdig immediately rejected. -This post is about those parts.

-

Why DoT when you already have -DoH?

-

Numa has shipped DoH since v0.1. Both protocols tunnel DNS over TLS; -DoH wraps queries in HTTP/2, DoT is DNS-over-TCP with TLS in front. Same -privacy guarantees, different wrapper.

-

The answer to “why both” is that phones ask for DoT by -name. iOS system DNS configures it with two fields (IP + server -name) instead of a URL template. Android 9+ “Private DNS” speaks DoT -natively. Linux stubs default to DoT. I wanted my phone on Numa without -installing anything on the phone itself, and DoT is the protocol iOS and -Android already speak for that.

-

The wire format is -refreshingly small

-

RFC 7858 is one sentence of wire protocol: DNS-over-TCP (RFC 1035 -§4.2.2) with TLS in front, on port 853. DNS-over-TCP has existed -since 1987 — a 2-byte length prefix followed by the DNS message. DoT is -that, wrapped in a TLS session. The entire framing code is seven -lines:

-
async fn write_framed<S>(stream: &mut S, msg: &[u8]) -> io::Result<()>
-where S: AsyncWriteExt + Unpin {
-    let mut out = Vec::with_capacity(2 + msg.len());
-    out.extend_from_slice(&(msg.len() as u16).to_be_bytes());
-    out.extend_from_slice(msg);
-    stream.write_all(&out).await?;
-    stream.flush().await
-}
-

Reads are symmetric: read_exact two bytes, convert to -u16, read_exact that many bytes. No HTTP -headers, no chunked encoding, no framing layer.

-

Persistent connections

-

A fresh TCP+TLS handshake is at least 3 RTTs — about 300ms on a 100ms -connection, 60× the cost of a UDP query. RFC 7858 §3.4 says clients -SHOULD reuse the TCP connection for multiple queries, and every real DoT -client does: iOS, Android, systemd, stubby. A single connection often -carries hundreds of queries.

-

Timing diagram comparing a DNS lookup over plain UDP (1 RTT), over DoT on a fresh connection (3 RTTs — TCP handshake, TLS 1.3 handshake, then the query), and over a reused DoT session (1 RTT, same as UDP).

-

The amortization point is the whole game. If you only ever do one -query per connection, DoT is roughly 3× slower than UDP and you should -not use it. If you reuse the same TLS session for a browsing session’s -worth of queries, the handshake is paid once and every subsequent query -is effectively free.

-

The server is a loop that reads a length-prefixed message, resolves -it, writes the response framed the same way, waits for the next one. -Three timeouts keep it honest:

-
    -
  • Handshake timeout (10s) — a slowloris that opens -TCP but never sends a ClientHello can’t pin a worker.
  • -
  • Idle timeout (30s) — a connected client with -nothing to say gets dropped.
  • -
  • Write timeout (10s) — a stalled reader can’t hold a -response buffer indefinitely.
  • -
-

A semaphore caps concurrent connections at 512 so a burst of -handshakes can’t exhaust the tokio runtime.

-

ALPN, the -cross-protocol defense that matters

-

If DoT lives on port 853 and HTTPS on 443, what stops an HTTP/2 -client from hitting 853 and getting confused replies? Cross-protocol attacks exist and -have had real CVEs. The defense is ALPN: during the TLS handshake the -client advertises protocols, the server picks one it supports or fails. -A DoT server advertises "dot"; a client offering only -"h2" gets a no_application_protocol fatal -alert before any frames are exchanged.

-

rustls enforces this by default when you set -alpn_protocols:

-
let mut config = ServerConfig::builder()
-    .with_no_client_auth()
-    .with_single_cert(certs, key)?;
-config.alpn_protocols = vec![b"dot".to_vec()];
-

“The library enforces it by default” has a latent risk: a future -rustls upgrade could change the default, and the defense would quietly -evaporate. I wrote a test that pins the behavior so any regression in a -dependency update fails loudly:

-
#[tokio::test]
-async fn dot_rejects_non_dot_alpn() {
-    let (addr, cert_der) = spawn_dot_server().await;
-    let client_config = dot_client(&cert_der, vec![b"h2".to_vec()]);
-    let connector = tokio_rustls::TlsConnector::from(client_config);
-    let tcp = tokio::net::TcpStream::connect(addr).await.unwrap();
-    let result = connector
-        .connect(ServerName::try_from("numa.numa").unwrap(), tcp)
-        .await;
-    assert!(result.is_err(),
-        "DoT server must reject ALPN that doesn't include \"dot\"");
-}
-

When you’re leaning on a library’s default for a security-critical -invariant, the test is the contract.

-

Two bugs that hid for days

-

Both were fixed before v0.10 shipped. Both stayed hidden because my -initial tests used permissive clients.

-

The rustls crypto provider -panic

-

rustls 0.23 requires a CryptoProvider installed before -you can build a ServerConfig. Numa’s HTTPS proxy calls -install_default as a side effect when it builds its own -config, so DoT “just worked” for users who enabled both — the proxy had -already initialized the provider before DoT’s first handshake.

-

Then I added support for user-provided DoT certificates. Someone -running DoT with their own Let’s Encrypt cert, with the HTTPS proxy -disabled, would hit:

-
thread 'dot' panicked at rustls-0.23.25/src/crypto/mod.rs:185:14:
-no process-level CryptoProvider available -- call
-CryptoProvider::install_default() before this point
-

The panic happened on the first client connection, not at startup. -While writing the integration suite for “DoT with BYO cert, proxy -disabled” — the one combination nobody had ever actually exercised — the -first run panicked. Fix is two lines: call install_default -inside load_tls_config so DoT can stand alone. If a side -effect initializes something and you have a path that skips that side -effect, you have a bug waiting for a specific deployment.

-

The SAN bug iOS was happy -to accept

-

Numa’s self-signed DoT cert is generated on first run from a local CA -alongside the data directory. It needs to match whatever -ServerName the client sends as SNI. For the HTTPS proxy, -that’s the wildcard domain pattern *.numa (matching -frontend.numa, api.numa, etc.). I initially -reused the same SAN list for DoT: a wildcard *.numa and -nothing else.

-

On an iPhone this worked perfectly. Full browsing session, persistent -connections in the log, ad blocking active. I was about to merge when I -ran one last smoke test with kdig (GnuTLS-backed, from Knot DNS):

-
$ kdig @192.168.1.16 -p 853 +tls \
-    +tls-ca=/usr/local/var/numa/ca.pem \
-    +tls-hostname=numa.numa example.com A
-
-;; TLS, handshake failed (Error in the certificate.)
-

Huh.

-

RFC -6125 §6.4.3: a wildcard in a certificate’s DNS-ID matches exactly -one label. *.numa matches frontend.numa, but -not numa.numa, because the wildcard wants at least one -label to substitute and strict clients reject wildcards in the leftmost -label under single-label TLDs as ambiguous.

-

iOS’s TLS stack is lenient and accepts it. GnuTLS, NSS (Firefox), and -most non-Apple validators don’t. The fix is five lines — add an explicit -numa.numa SAN alongside the wildcard. But the lesson is the -one that stuck: I wrote a commit message saying “fix an iOS bug” and had -to rewrite it, because iOS was fine. The real bug was that every -GnuTLS/NSS-based client on the planet would have rejected the cert, and -I only found it by running one more test with a stricter tool.

-
-

Test with the strict client. The permissive client hides your -bugs.

-
-

Getting your phone onto it

-

A DoT server is useless without a way to point a phone at it. iOS -won’t let you type an IP and a server name into Settings directly — you -install a .mobileconfig profile that bundles the CA as a -trust anchor and the DNS settings in a single payload.

-

Numa ships a subcommand that builds one on the fly and serves it over -a QR code in the terminal:

-
$ numa setup-phone
-
-  Numa Phone Setup
-
-  Profile URL: http://192.168.1.10:8765/mobileconfig
-
-  ██████████████████████████████
-  ██                          ██
-  ██   [QR code rendered in   ██
-  ██    your terminal]        ██
-  ██                          ██
-  ██████████████████████████████
-
-  On your iPhone:
-    1. Open Camera, point at the QR code, tap the yellow banner
-    2. Allow the download when Safari asks
-    3. Open Settings — tap "Profile Downloaded" near the top
-       (or: Settings → General → VPN & Device Management → Numa DNS)
-    4. Tap Install (top right), enter passcode, Install again
-    5. Settings → General → About → Certificate Trust Settings
-       Toggle ON "Numa Local CA" — required for DoT to work
-

The same QR is available in the dashboard — click “Phone Setup” in -the header and the popover renders an SVG QR code pointing at the -mobileconfig URL. On mobile viewports it shows a direct download link -instead.

-

Numa dashboard with Phone Setup popover showing QR code and install instructions

-

Step 4 is non-negotiable. Even though the CA is bundled in the same -profile that installs the DNS settings, iOS still requires the user to -explicitly toggle trust in Certificate Trust Settings. It’s a deliberate -iOS policy to prevent profile-based trust injection — annoying, and -correct.

-

I’ve been dogfooding this since v0.10 shipped in early April. The -phone resolves through Numa over DoT whenever I’m home; persistent -connections are visible in the log as a single source port living -through dozens of queries. The one real caveat: if the laptop’s LAN IP -changes, the profile breaks. RFC 9462 DDR -fixes that — Numa can respond to _dns.resolver.arpa IN SVCB -with its current IP and iOS picks it up on each network join. Next piece -of work.

-

What I learned

-

RFC-level small, API-level hard. RFC 7858 is ten -pages. The framing is trivial. But the subtle stuff — ALPN, timeouts, -connection caps, handshake vs idle vs write deadlines, backoff on accept -errors — isn’t in the RFC. Miss any of it and you leak a DoS vector or a -protocol confusion hole.

-

Your test matrix is your security matrix. Both bugs -in this post were hidden by lenient clients. In both cases the strict -client — kdig, or a specific config combination — surfaced the bug -instantly. Pick test tools for strictness, not convenience. The moment -you find yourself thinking “but iOS accepts it,” stop and run kdig.

-

Don’t initialize global state via side effects. -“Module A installs a global, module B silently depends on it, disabling -A breaks B” is a bug pattern that keeps coming back. Fix: have module B -initialize its dependency explicitly, even if it means calling an -idempotent install_default twice. The dependency graph -should be local and obvious.

-

What’s next

-
    -
  • DoH server — shipped in v0.12.0. -POST /dns-query accepts RFC 8484 -wire-format queries, so Firefox/Chrome can point their built-in DoH at -Numa.
  • -
  • DoQ server (RFC 9250) — DNS over QUIC. Android 14+ -supports it natively.
  • -
  • DDR (RFC 9462) — auto-discovery via -_dns.resolver.arpa IN SVCB, so phones pick up a moved Numa -instance without the installed profile going stale.
  • -
-

The code is at github.com/razvandimescu/numa -— the DoT listener is in src/dot.rs -and the phone onboarding flow is in src/setup_phone.rs -and src/mobileconfig.rs. -MIT license.

-
- - - - - -- 2.34.1 From 22bebb85a0ce0a49d897770c74b89db6a94f610b Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 02:17:33 +0300 Subject: [PATCH 010/139] fix: config path advisory ignores XDG file on interactive root (#81) (#83) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port-53 and TLS-data-dir advisories told users to create ~/.config/numa/numa.toml, but config_dir() routed root to /var/lib/numa/ and load_config never consulted the XDG path, so the file the user created was silently ignored. New suggested_config_path() helper prefers $HOME/.config/numa/ when HOME is set (and isn't "/" or empty), with config_dir() as lazy fallback. Used by both advisories and by load_config as an additional candidate, so the advised path is the path numa actually reads. Runtime state (services.json, TLS CA) stays in FHS — config_dir()/data_dir() are intentionally unchanged to keep continuity with the installed daemon. End-to-end replication + regression check in tests/docker/issue-81.sh: four scenarios (replication and existing-install, each against main and fix), all matching expectations. --- src/config.rs | 13 ++-- src/lib.rs | 105 +++++++++++++++++++++++++ src/system_dns.rs | 5 +- src/tls.rs | 5 +- tests/docker/hold53.py | 5 ++ tests/docker/issue-81.sh | 164 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 288 insertions(+), 9 deletions(-) create mode 100644 tests/docker/hold53.py create mode 100755 tests/docker/issue-81.sh diff --git a/src/config.rs b/src/config.rs index 6480883..60b505e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -612,6 +612,13 @@ pub fn load_config(path: &str) -> Result { let filename = p.file_name().unwrap_or(p.as_os_str()); v.push(crate::config_dir().join(filename)); v.push(crate::data_dir().join(filename)); + // Interactive root and sudo'd users: always consult the XDG path + // so `touch ~/.config/numa/numa.toml` works regardless of whether + // config_dir() routed to FHS (issue #81). + let suggested = crate::suggested_config_path(); + if !v.contains(&suggested) { + v.push(suggested); + } } v }; @@ -632,11 +639,7 @@ pub fn load_config(path: &str) -> Result { } } - // Show config_dir candidate as the "expected" path — it's actionable - let display_path = candidates - .get(1) - .map(|p| p.to_string_lossy().to_string()) - .unwrap_or_else(|| resolve_path(path)); + let display_path = crate::suggested_config_path().to_string_lossy().to_string(); log::info!("config not found, using defaults (create {})", display_path); Ok(ConfigLoad { config: Config::default(), diff --git a/src/lib.rs b/src/lib.rs index be71125..4074020 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,6 +44,42 @@ pub fn hostname() -> String { .unwrap_or_else(|| "numa".to_string()) } +/// Path to suggest to an interactive user when asking them to create +/// `numa.toml`. Prefers `$HOME/.config/numa/numa.toml` when HOME is set +/// (actionable without sudo); falls back to `config_dir()` otherwise. +/// +/// Note: `config_dir()` routes interactive root to FHS (`/var/lib/numa`) +/// so that runtime state like `services.json` stays continuous with the +/// installed daemon. This helper exists specifically to give advisories +/// and `load_config` an XDG-aware path for user-authored config, without +/// moving runtime state out of FHS — see issue #81. +pub(crate) fn suggested_config_path() -> std::path::PathBuf { + #[cfg(not(windows))] + { + resolve_suggested_config_path(std::env::var("HOME").ok().as_deref(), config_dir) + } + #[cfg(windows)] + { + config_dir().join("numa.toml") + } +} + +#[cfg(not(windows))] +fn resolve_suggested_config_path(home: Option<&str>, fallback_dir: F) -> std::path::PathBuf +where + F: FnOnce() -> std::path::PathBuf, +{ + if let Some(home) = home { + if !home.is_empty() && home != "/" { + return std::path::PathBuf::from(home) + .join(".config") + .join("numa") + .join("numa.toml"); + } + } + fallback_dir().join("numa.toml") +} + /// Shared config directory for persistent data (services.json, etc). /// Unix users: ~/.config/numa/ /// Linux root daemon: /var/lib/numa (FHS) — falls back to /usr/local/var/numa @@ -163,4 +199,73 @@ mod tests { fn linux_data_dir_only_fhs_uses_fhs() { assert_eq!(resolve_linux_data_dir(false, true), "/var/lib/numa"); } + + #[cfg(not(windows))] + fn fhs() -> std::path::PathBuf { + std::path::PathBuf::from("/var/lib/numa") + } + + #[cfg(not(windows))] + #[test] + fn suggested_config_path_prefers_home() { + assert_eq!( + resolve_suggested_config_path(Some("/home/alice"), fhs), + std::path::PathBuf::from("/home/alice/.config/numa/numa.toml"), + ); + } + + #[cfg(not(windows))] + #[test] + fn suggested_config_path_prefers_root_home_over_fhs() { + // Interactive root: HOME=/root is a real user context, not a daemon signal. + // Advisory must point where load_config will actually look — issue #81. + assert_eq!( + resolve_suggested_config_path(Some("/root"), fhs), + std::path::PathBuf::from("/root/.config/numa/numa.toml"), + ); + } + + #[cfg(not(windows))] + #[test] + fn suggested_config_path_falls_back_when_home_unset() { + assert_eq!( + resolve_suggested_config_path(None, fhs), + std::path::PathBuf::from("/var/lib/numa/numa.toml"), + ); + } + + #[cfg(not(windows))] + #[test] + fn suggested_config_path_falls_back_when_home_is_root() { + // systemd services sometimes have HOME=/ — don't treat that as a real home. + assert_eq!( + resolve_suggested_config_path(Some("/"), fhs), + std::path::PathBuf::from("/var/lib/numa/numa.toml"), + ); + } + + #[cfg(not(windows))] + #[test] + fn suggested_config_path_falls_back_when_home_is_empty() { + assert_eq!( + resolve_suggested_config_path(Some(""), fhs), + std::path::PathBuf::from("/var/lib/numa/numa.toml"), + ); + } + + #[cfg(not(windows))] + #[test] + fn suggested_config_path_skips_fallback_when_home_valid() { + // Happy path shouldn't probe the filesystem via config_dir(). + let called = std::cell::Cell::new(false); + let fallback = || { + called.set(true); + std::path::PathBuf::from("/should/not/be/used") + }; + let _ = resolve_suggested_config_path(Some("/home/alice"), fallback); + assert!( + !called.get(), + "fallback must not be invoked when HOME is valid" + ); + } } diff --git a/src/system_dns.rs b/src/system_dns.rs index 115ce2d..539f0a1 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -91,7 +91,7 @@ pub fn try_port53_advisory(bind_addr: &str, err: &std::io::Error) -> Option Option Option&1 | tail -1 +if ! command -v cargo &>/dev/null; then + curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --quiet +fi +. "$HOME/.cargo/env" + +build_from() { + local label="$1"; local src="$2" + mkdir -p "/work/$label" + tar -C "$src" --exclude=./target --exclude=./.git -cf - . | tar -C "/work/$label" -xf - + (cd "/work/$label" && cargo build --release --locked 2>&1 | tail -1) + cp "/work/$label/target/release/numa" "/work/numa-$label" +} + +build_from main /main +build_from fix /fix + +holder=0 +stop_holder() { + if [ "$holder" -ne 0 ]; then + kill "$holder" 2>/dev/null || true + wait "$holder" 2>/dev/null || true + holder=0 + fi +} +trap stop_holder EXIT + +start_holder() { + python3 /tmp/hold53.py & + holder=$! + sleep 0.3 +} + +write_test_config() { + local path="$1" + mkdir -p "$(dirname "$path")" + cat > "$path" < /tmp/run1.txt 2>&1 + set -e + echo "── step 1: advisory printed by $label ──" + grep -E "Create .* with:" /tmp/run1.txt | sed "s/^/ /" || echo " " + + write_test_config "$XDG_CONFIG" + echo "── step 2: wrote config at $XDG_CONFIG ──" + + set +e + timeout 3 "$bin" > /tmp/run2.txt 2>&1 + set -e + stop_holder + + verdict "$label" "$expected" /tmp/run2.txt +} + +scenario_existing_install() { + local label="$1"; local bin="/work/numa-$label" + echo + echo "════════ EXISTING INSTALL / $label ════════" + rm -rf /root/.config/numa /var/lib/numa + write_test_config "$FHS_CONFIG" + + start_holder + set +e + timeout 3 "$bin" > /tmp/run.txt 2>&1 + set -e + stop_holder + + verdict "$label" "bound" /tmp/run.txt +} + +RC=0 +scenario_replication main ignored || RC=1 +scenario_replication fix bound || RC=1 +scenario_existing_install main || RC=1 +scenario_existing_install fix || RC=1 + +echo +if [ "$RC" -eq 0 ]; then + echo "── all scenarios matched expectations ──" +else + echo "── FAILURE: one or more scenarios diverged ──" +fi +exit $RC +' -- 2.34.1 From 7047767dc225ca56216eb677f4f312582027106b Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 06:12:08 +0300 Subject: [PATCH 011/139] feat: per-suffix conditional forwarding rules (#82) (#84) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: per-suffix conditional forwarding rules in numa.toml (#82) Adds a `[[forwarding]]` config section so users can explicitly route domain suffixes to specific upstreams. Config-declared rules take precedence over auto-discovered rules (macOS scutil, Linux search domains) via first-match semantics. Example — the reporter's reverse-DNS case: [[forwarding]] suffix = "168.192.in-addr.arpa" upstream = "100.90.1.63:5361" Bare IPs default to port 53. IPv6 is supported via parse_upstream_addr. ForwardingRule::new() constructor replaces direct struct-literal construction, and make_rule() now delegates to parse_upstream_addr to fix a latent IPv6 parsing bug. * feat: accept suffix as string or array in [[forwarding]] rules Reuses existing string_or_vec deserializer so users can write: suffix = ["168.192.in-addr.arpa", "onsite"] instead of repeating [[forwarding]] blocks per suffix. * style: rustfmt * refactor: drop config_count from merge_forwarding_rules return Log config rules directly from config.forwarding before merging, keeping the merge API clean of logging concerns. --- numa.toml | 8 ++ src/config.rs | 184 ++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 8 +- src/system_dns.rs | 19 +++-- 4 files changed, 212 insertions(+), 7 deletions(-) diff --git a/numa.toml b/numa.toml index 92b5411..3b716e8 100644 --- a/numa.toml +++ b/numa.toml @@ -45,6 +45,14 @@ api_port = 5380 # "co", "br", "au", "ca", "jp", # other major ccTLDs # ] +# [[forwarding]] # per-suffix conditional forwarding rules +# suffix = "168.192.in-addr.arpa" # single suffix → one upstream +# upstream = "100.90.1.63:5361" +# +# [[forwarding]] +# suffix = ["home.local", "home.arpa"] # multiple suffixes → same upstream +# upstream = "10.0.0.1" # port 53 default + # [blocking] # enabled = true # set to false to disable ad blocking # refresh_hours = 24 diff --git a/src/config.rs b/src/config.rs index 60b505e..ae9f685 100644 --- a/src/config.rs +++ b/src/config.rs @@ -33,6 +33,39 @@ pub struct Config { pub dot: DotConfig, #[serde(default)] pub mobile: MobileConfig, + #[serde(default)] + pub forwarding: Vec, +} + +#[derive(Deserialize, Clone, Debug)] +pub struct ForwardingRuleConfig { + #[serde(deserialize_with = "string_or_vec")] + pub suffix: Vec, + pub upstream: String, +} + +impl ForwardingRuleConfig { + fn to_runtime_rules(&self) -> Result> { + let addr = crate::forward::parse_upstream_addr(&self.upstream, 53) + .map_err(|e| format!("forwarding rule for upstream '{}': {}", self.upstream, e))?; + Ok(self + .suffix + .iter() + .map(|s| crate::system_dns::ForwardingRule::new(s.clone(), addr)) + .collect()) + } +} + +pub fn merge_forwarding_rules( + config_rules: &[ForwardingRuleConfig], + discovered: Vec, +) -> Result> { + let mut merged: Vec = Vec::new(); + for rule in config_rules { + merged.extend(rule.to_runtime_rules()?); + } + merged.extend(discovered); + Ok(merged) } #[derive(Deserialize)] @@ -585,6 +618,157 @@ mod tests { assert!(config.upstream.address.is_empty()); assert!(config.upstream.fallback.is_empty()); } + + // ── issue #82: [[forwarding]] config section ──────────────────────── + + #[test] + fn forwarding_empty_by_default() { + let config: Config = toml::from_str("").unwrap(); + assert!(config.forwarding.is_empty()); + } + + #[test] + fn forwarding_parses_single_rule() { + let toml = r#" + [[forwarding]] + suffix = "home.local" + upstream = "100.90.1.63:5361" + "#; + let config: Config = toml::from_str(toml).unwrap(); + assert_eq!(config.forwarding.len(), 1); + assert_eq!(config.forwarding[0].suffix, &["home.local"]); + assert_eq!(config.forwarding[0].upstream, "100.90.1.63:5361"); + } + + #[test] + fn forwarding_parses_reverse_dns_zone() { + let toml = r#" + [[forwarding]] + suffix = "168.192.in-addr.arpa" + upstream = "100.90.1.63:5361" + "#; + let config: Config = toml::from_str(toml).unwrap(); + assert_eq!(config.forwarding.len(), 1); + assert_eq!(config.forwarding[0].suffix, &["168.192.in-addr.arpa"]); + } + + #[test] + fn forwarding_parses_multiple_rules() { + let toml = r#" + [[forwarding]] + suffix = "168.192.in-addr.arpa" + upstream = "100.90.1.63:5361" + + [[forwarding]] + suffix = "home.local" + upstream = "10.0.0.1" + "#; + let config: Config = toml::from_str(toml).unwrap(); + assert_eq!(config.forwarding.len(), 2); + assert_eq!(config.forwarding[1].upstream, "10.0.0.1"); + } + + #[test] + fn forwarding_parses_suffix_array() { + let toml = r#" + [[forwarding]] + suffix = ["168.192.in-addr.arpa", "onsite"] + upstream = "192.168.88.1" + "#; + let config: Config = toml::from_str(toml).unwrap(); + assert_eq!(config.forwarding.len(), 1); + assert_eq!( + config.forwarding[0].suffix, + &["168.192.in-addr.arpa", "onsite"] + ); + } + + #[test] + fn forwarding_suffix_array_expands_to_multiple_runtime_rules() { + let rule = ForwardingRuleConfig { + suffix: vec!["168.192.in-addr.arpa".to_string(), "onsite".to_string()], + upstream: "192.168.88.1".to_string(), + }; + let runtime = rule.to_runtime_rules().unwrap(); + assert_eq!(runtime.len(), 2); + assert_eq!(runtime[0].suffix, "168.192.in-addr.arpa"); + assert_eq!(runtime[1].suffix, "onsite"); + assert_eq!(runtime[0].upstream, runtime[1].upstream); + } + + #[test] + fn forwarding_upstream_with_explicit_port() { + let rule = ForwardingRuleConfig { + suffix: vec!["home.local".to_string()], + upstream: "100.90.1.63:5361".to_string(), + }; + let runtime = rule.to_runtime_rules().unwrap(); + assert_eq!(runtime.len(), 1); + assert_eq!(runtime[0].upstream.to_string(), "100.90.1.63:5361"); + assert_eq!(runtime[0].suffix, "home.local"); + } + + #[test] + fn forwarding_upstream_defaults_to_port_53() { + let rule = ForwardingRuleConfig { + suffix: vec!["home.local".to_string()], + upstream: "100.90.1.63".to_string(), + }; + let runtime = rule.to_runtime_rules().unwrap(); + assert_eq!(runtime[0].upstream.to_string(), "100.90.1.63:53"); + } + + #[test] + fn forwarding_invalid_upstream_returns_error() { + let rule = ForwardingRuleConfig { + suffix: vec!["home.local".to_string()], + upstream: "not-a-valid-host".to_string(), + }; + assert!(rule.to_runtime_rules().is_err()); + } + + #[test] + fn forwarding_config_rules_take_precedence_over_discovered() { + let config_rules = vec![ForwardingRuleConfig { + suffix: vec!["home.local".to_string()], + upstream: "10.0.0.1:53".to_string(), + }]; + let discovered = vec![crate::system_dns::ForwardingRule::new( + "home.local".to_string(), + "192.168.1.1:53".parse().unwrap(), + )]; + let merged = merge_forwarding_rules(&config_rules, discovered).unwrap(); + let picked = crate::system_dns::match_forwarding_rule("host.home.local", &merged) + .expect("rule should match"); + assert_eq!(picked.to_string(), "10.0.0.1:53"); + } + + #[test] + fn forwarding_merge_preserves_non_overlapping_discovered() { + let config_rules = vec![ForwardingRuleConfig { + suffix: vec!["home.local".to_string()], + upstream: "10.0.0.1:53".to_string(), + }]; + let discovered = vec![crate::system_dns::ForwardingRule::new( + "corp.example".to_string(), + "192.168.1.1:53".parse().unwrap(), + )]; + let merged = merge_forwarding_rules(&config_rules, discovered).unwrap(); + assert_eq!(merged.len(), 2); + let picked = crate::system_dns::match_forwarding_rule("host.corp.example", &merged) + .expect("discovered rule should still match"); + assert_eq!(picked.to_string(), "192.168.1.1:53"); + } + + #[test] + fn forwarding_merge_suffix_array_expands_to_multiple_rules() { + let config_rules = vec![ForwardingRuleConfig { + suffix: vec!["a.local".to_string(), "b.local".to_string()], + upstream: "10.0.0.1:53".to_string(), + }]; + let merged = merge_forwarding_rules(&config_rules, vec![]).unwrap(); + assert_eq!(merged.len(), 2); + } } pub struct ConfigLoad { diff --git a/src/main.rs b/src/main.rs index 903be9a..7592186 100644 --- a/src/main.rs +++ b/src/main.rs @@ -210,7 +210,13 @@ async fn main() -> numa::Result<()> { } service_store.load_persisted(); - let forwarding_rules = system_dns.forwarding_rules; + for fwd in &config.forwarding { + for suffix in &fwd.suffix { + info!("forwarding .{} to {} (config rule)", suffix, fwd.upstream); + } + } + let forwarding_rules = + numa::config::merge_forwarding_rules(&config.forwarding, system_dns.forwarding_rules)?; // Resolve data_dir from config, falling back to the platform default. // Used for TLS CA storage below and stored on ServerCtx for runtime use. diff --git a/src/system_dns.rs b/src/system_dns.rs index 539f0a1..d560a6e 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -25,6 +25,17 @@ pub struct ForwardingRule { pub upstream: SocketAddr, } +impl ForwardingRule { + pub fn new(suffix: String, upstream: SocketAddr) -> Self { + let dot_suffix = format!(".{}", suffix); + Self { + suffix, + dot_suffix, + upstream, + } + } +} + /// Result of system DNS discovery — default upstream + conditional forwarding rules. pub struct SystemDnsInfo { pub default_upstream: Option, @@ -221,12 +232,8 @@ fn discover_macos() -> SystemDnsInfo { #[cfg(any(target_os = "macos", target_os = "linux"))] fn make_rule(domain: &str, nameserver: &str) -> Option { - let addr: SocketAddr = format!("{}:53", nameserver).parse().ok()?; - Some(ForwardingRule { - dot_suffix: format!(".{}", domain), - suffix: domain.to_string(), - upstream: addr, - }) + let addr = crate::forward::parse_upstream_addr(nameserver, 53).ok()?; + Some(ForwardingRule::new(domain.to_string(), addr)) } #[cfg(target_os = "linux")] -- 2.34.1 From 05baad0cc0b8c41df9a1bc6d1d8395733dc970f0 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 18:35:06 +0300 Subject: [PATCH 012/139] feat: DoT (DNS over TLS) client upstream Adds tls:// upstream support for forwarding queries over DNS-over-TLS (RFC 7858). Parses tls://IP:PORT#hostname format, with default port 853. - New Upstream::Dot variant with TLS connector - forward_dot: length-prefixed DNS over TLS stream - build_dot_connector: system root CAs via webpki-roots - parse_upstream handles tls:// prefix Example config: address = ["tls://9.9.9.9#dns.quad9.net"] --- Cargo.lock | 1 + Cargo.toml | 1 + src/forward.rs | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 86f96da..c7cd38b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1170,6 +1170,7 @@ dependencies = [ "tokio-rustls", "toml", "tower", + "webpki-roots", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index aa67dd4..c5d5e1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ arc-swap = "1" ring = "0.17" rustls-pemfile = "2.2.0" qrcode = { version = "0.14", default-features = false, features = ["svg"] } +webpki-roots = "1" [dev-dependencies] criterion = { version = "0.8", features = ["html_reports"] } diff --git a/src/forward.rs b/src/forward.rs index 78efcb9..ea2f1e2 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -18,6 +18,11 @@ pub enum Upstream { url: String, client: reqwest::Client, }, + Dot { + addr: SocketAddr, + tls_name: Option, + connector: tokio_rustls::TlsConnector, + }, } impl PartialEq for Upstream { @@ -25,6 +30,7 @@ impl PartialEq for Upstream { match (self, other) { (Self::Udp(a), Self::Udp(b)) => a == b, (Self::Doh { url: a, .. }, Self::Doh { url: b, .. }) => a == b, + (Self::Dot { addr: a, .. }, Self::Dot { addr: b, .. }) => a == b, _ => false, } } @@ -35,6 +41,10 @@ impl fmt::Display for Upstream { match self { Upstream::Udp(addr) => write!(f, "{}", addr), Upstream::Doh { url, .. } => f.write_str(url), + Upstream::Dot { addr, tls_name, .. } => match tls_name { + Some(name) => write!(f, "tls://{}#{}", addr, name), + None => write!(f, "tls://{}", addr), + }, } } } @@ -62,10 +72,36 @@ pub fn parse_upstream(s: &str, default_port: u16) -> Result { client, }); } + // tls://IP:PORT#hostname or tls://IP#hostname (default port 853) + if let Some(rest) = s.strip_prefix("tls://") { + let (addr_part, tls_name) = match rest.find('#') { + Some(i) => (&rest[..i], Some(rest[i + 1..].to_string())), + None => (rest, None), + }; + let addr = parse_upstream_addr(addr_part, 853)?; + let connector = build_dot_connector()?; + return Ok(Upstream::Dot { + addr, + tls_name, + connector, + }); + } let addr = parse_upstream_addr(s, default_port)?; Ok(Upstream::Udp(addr)) } +fn build_dot_connector() -> Result { + let _ = rustls::crypto::ring::default_provider().install_default(); + let mut root_store = rustls::RootCertStore::empty(); + root_store.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned()); + let config = rustls::ClientConfig::builder() + .with_root_certificates(root_store) + .with_no_client_auth(); + Ok(tokio_rustls::TlsConnector::from(std::sync::Arc::new( + config, + ))) +} + #[derive(Clone)] pub struct UpstreamPool { primary: Vec, @@ -174,6 +210,11 @@ pub async fn forward_query( match upstream { Upstream::Udp(addr) => forward_udp(query, *addr, timeout_duration).await, Upstream::Doh { url, client } => forward_doh(query, url, client, timeout_duration).await, + Upstream::Dot { + addr, + tls_name, + connector, + } => forward_dot(query, *addr, tls_name, connector, timeout_duration).await, } } @@ -236,6 +277,45 @@ pub(crate) async fn forward_tcp( DnsPacket::from_buffer(&mut recv_buffer) } +async fn forward_dot( + query: &DnsPacket, + addr: SocketAddr, + tls_name: &Option, + connector: &tokio_rustls::TlsConnector, + timeout_duration: Duration, +) -> Result { + use rustls::pki_types::ServerName; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + use tokio::net::TcpStream; + + let server_name = match tls_name { + Some(name) => ServerName::try_from(name.clone())?, + None => ServerName::try_from(addr.ip().to_string())?, + }; + + let tcp = timeout(timeout_duration, TcpStream::connect(addr)).await??; + let mut tls = timeout(timeout_duration, connector.connect(server_name, tcp)).await??; + + let mut send_buffer = BytePacketBuffer::new(); + query.write(&mut send_buffer)?; + let wire = send_buffer.filled(); + + let mut outbuf = Vec::with_capacity(2 + wire.len()); + outbuf.extend_from_slice(&(wire.len() as u16).to_be_bytes()); + outbuf.extend_from_slice(wire); + timeout(timeout_duration, tls.write_all(&outbuf)).await??; + + let mut len_buf = [0u8; 2]; + timeout(timeout_duration, tls.read_exact(&mut len_buf)).await??; + let resp_len = u16::from_be_bytes(len_buf) as usize; + + let mut data = vec![0u8; resp_len]; + timeout(timeout_duration, tls.read_exact(&mut data)).await??; + + let mut recv_buffer = BytePacketBuffer::from_bytes(&data); + DnsPacket::from_buffer(&mut recv_buffer) +} + async fn forward_doh( query: &DnsPacket, url: &str, -- 2.34.1 From 7efac85836bacd483e174e5509fad03bac3f548f Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 04:20:18 +0300 Subject: [PATCH 013/139] feat: wire-level forwarding, cache, request hedging, and DoH keepalive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire-level forwarding path skips DnsPacket parse/serialize on the hot path. Cache stores raw wire bytes with pre-scanned TTL offsets — patches ID + TTLs in-place on lookup instead of cloning parsed packets. Request hedging (Dean & Barroso "Tail at Scale") fires a second parallel request after a configurable delay (default 10ms) when the primary upstream stalls. DoH keepalive loop prevents idle HTTP/2 + TLS connection teardown. Recursive resolver now hedges across multiple NS addresses and caches NS delegation records to skip TLD re-queries. Integration test harness polls /blocking/stats instead of fixed sleep, eliminating the blocklist-download race condition. --- Cargo.lock | 458 +++++++++- Cargo.toml | 6 + benches/numa-bench.toml | 25 + benches/recursive_compare.rs | 1649 ++++++++++++++++++++++++++++++++++ scripts/bench-recursive.sh | 115 +++ src/api.rs | 1 + src/cache.rs | 177 ++-- src/config.rs | 6 + src/ctx.rs | 47 +- src/doh.rs | 11 +- src/dot.rs | 6 +- src/forward.rs | 186 +++- src/lib.rs | 1 + src/main.rs | 26 +- src/recursive.rs | 123 ++- src/srtt.rs | 5 + src/wire.rs | 1347 +++++++++++++++++++++++++++ tests/integration.sh | 12 +- 18 files changed, 4091 insertions(+), 110 deletions(-) create mode 100644 benches/numa-bench.toml create mode 100644 benches/recursive_compare.rs create mode 100755 scripts/bench-recursive.sh create mode 100644 src/wire.rs diff --git a/Cargo.lock b/Cargo.lock index c7cd38b..eaba214 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,6 +82,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + [[package]] name = "arc-swap" version = "1.9.0" @@ -142,6 +148,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -410,6 +427,21 @@ dependencies = [ "itertools", ] +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -493,6 +525,18 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "env_filter" version = "1.0.1" @@ -554,6 +598,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -679,11 +729,24 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + [[package]] name = "h2" version = "0.4.13" @@ -714,12 +777,82 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + [[package]] name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hickory-proto" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" +dependencies = [ + "async-trait", + "bytes", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "h2", + "http", + "idna", + "ipnet", + "once_cell", + "rand", + "ring", + "rustls", + "thiserror", + "tinyvec", + "tokio", + "tokio-rustls", + "tracing", + "url", + "webpki-roots 0.26.11", +] + +[[package]] +name = "hickory-resolver" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "moka", + "once_cell", + "parking_lot", + "rand", + "resolv-conf", + "rustls", + "smallvec", + "thiserror", + "tokio", + "tokio-rustls", + "tracing", + "webpki-roots 0.26.11", +] + [[package]] name = "http" version = "1.4.0" @@ -802,7 +935,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots", + "webpki-roots 1.0.6", ] [[package]] @@ -909,6 +1042,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "idna" version = "1.1.0" @@ -937,7 +1076,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "ipconfig" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d40460c0ce33d6ce4b0630ad68ff63d6661961c48b6dba35e5a4d81cfb48222" +dependencies = [ + "socket2", + "widestring", + "windows-registry", + "windows-result", + "windows-sys 0.61.2", ] [[package]] @@ -1029,6 +1183,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" version = "0.2.183" @@ -1041,6 +1201,15 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" @@ -1098,6 +1267,23 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "moka" +version = "0.12.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" +dependencies = [ + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "equivalent", + "parking_lot", + "portable-atomic", + "smallvec", + "tagptr", + "uuid", +] + [[package]] name = "nom" version = "7.1.3" @@ -1151,6 +1337,8 @@ dependencies = [ "criterion", "env_logger", "futures", + "hickory-proto", + "hickory-resolver", "http", "http-body-util", "hyper", @@ -1187,6 +1375,10 @@ name = "once_cell" version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +dependencies = [ + "critical-section", + "portable-atomic", +] [[package]] name = "once_cell_polyfill" @@ -1210,6 +1402,29 @@ dependencies = [ "winapi", ] +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + [[package]] name = "pem" version = "3.0.6" @@ -1305,6 +1520,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -1390,6 +1615,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.9.2" @@ -1453,6 +1684,15 @@ dependencies = [ "yasna", ] +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.12.3" @@ -1518,9 +1758,15 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", + "webpki-roots 1.0.6", ] +[[package]] +name = "resolv-conf" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" + [[package]] name = "ring" version = "0.17.14" @@ -1618,6 +1864,18 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" @@ -1780,6 +2038,12 @@ dependencies = [ "syn", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + [[package]] name = "thiserror" version = "2.0.18" @@ -2038,6 +2302,12 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "untrusted" version = "0.9.0" @@ -2068,6 +2338,17 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -2102,6 +2383,15 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + [[package]] name = "wasm-bindgen" version = "0.2.115" @@ -2157,6 +2447,40 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" version = "0.3.92" @@ -2177,6 +2501,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.6", +] + [[package]] name = "webpki-roots" version = "1.0.6" @@ -2186,6 +2519,12 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "widestring" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" + [[package]] name = "winapi" version = "0.3.9" @@ -2223,6 +2562,35 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -2390,6 +2758,88 @@ name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "writeable" diff --git a/Cargo.toml b/Cargo.toml index c5d5e1d..d7f6f9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,8 @@ webpki-roots = "1" criterion = { version = "0.8", features = ["html_reports"] } tower = { version = "0.5", features = ["util"] } http = "1" +hickory-resolver = { version = "0.25", features = ["https-ring", "webpki-roots"] } +hickory-proto = "0.25" [[bench]] name = "hot_path" @@ -49,3 +51,7 @@ harness = false [[bench]] name = "dnssec" harness = false + +[[bench]] +name = "recursive_compare" +harness = false diff --git a/benches/numa-bench.toml b/benches/numa-bench.toml new file mode 100644 index 0000000..0e058af --- /dev/null +++ b/benches/numa-bench.toml @@ -0,0 +1,25 @@ +[server] +bind_addr = "127.0.0.1:5454" +api_port = 5381 +api_bind_addr = "127.0.0.1" +data_dir = "/tmp/numa-bench" + +[upstream] +mode = "recursive" +timeout_ms = 10000 + +[cache] +min_ttl = 60 +max_ttl = 3600 + +[blocking] +enabled = false + +[dot] +enabled = false + +[mobile] +enabled = false + +[lan] +enabled = false diff --git a/benches/recursive_compare.rs b/benches/recursive_compare.rs new file mode 100644 index 0000000..e35768c --- /dev/null +++ b/benches/recursive_compare.rs @@ -0,0 +1,1649 @@ +//! DoH forwarding benchmark: Numa vs hickory-resolver. +//! +//! Both forward to the same DoH upstream (Quad9). +//! Measures end-to-end resolution time through each implementation. +//! +//! Fairness: +//! - Both reuse a single TLS connection (Numa via persistent server, +//! Hickory via a shared resolver instance with cache_size=0). +//! - Measurement order is alternated each round to cancel order bias. +//! - Numa cache is flushed before each query. +//! - 100 domains × 10 rounds for statistical confidence. +//! +//! Setup: +//! 1. Start a bench Numa instance: +//! cargo run -- benches/numa-bench.toml +//! 2. Run: +//! cargo bench --bench recursive_compare + +use std::net::SocketAddr; +use std::time::{Duration, Instant}; + +const DOH_UPSTREAM: &str = "https://9.9.9.9/dns-query"; +const NUMA_BENCH: &str = "127.0.0.1:5454"; +const NUMA_API: u16 = 5381; + +const DOMAINS: &[&str] = &[ + "example.com", + "rust-lang.org", + "kernel.org", + "signal.org", + "archlinux.org", + "openbsd.org", + "git-scm.com", + "sqlite.org", + "wireguard.com", + "mozilla.org", + "cloudflare.com", + "google.com", + "github.com", + "stackoverflow.com", + "wikipedia.org", + "reddit.com", + "amazon.com", + "apple.com", + "microsoft.com", + "facebook.com", + "twitter.com", + "linkedin.com", + "netflix.com", + "spotify.com", + "discord.com", + "twitch.tv", + "youtube.com", + "instagram.com", + "whatsapp.com", + "telegram.org", + "debian.org", + "ubuntu.com", + "fedoraproject.org", + "nixos.org", + "gentoo.org", + "freebsd.org", + "netbsd.org", + "dragonflybsd.org", + "illumos.org", + "haiku-os.org", + "python.org", + "golang.org", + "nodejs.org", + "ruby-lang.org", + "php.net", + "swift.org", + "kotlinlang.org", + "scala-lang.org", + "haskell.org", + "elixir-lang.org", + "erlang.org", + "clojure.org", + "julialang.org", + "ziglang.org", + "nim-lang.org", + "dlang.org", + "vlang.io", + "crystal-lang.org", + "racket-lang.org", + "ocaml.org", + "crates.io", + "npmjs.com", + "pypi.org", + "rubygems.org", + "packagist.org", + "nuget.org", + "maven.apache.org", + "hex.pm", + "hackage.haskell.org", + "pkg.go.dev", + "docker.com", + "kubernetes.io", + "prometheus.io", + "grafana.com", + "elastic.co", + "datadog.com", + "sentry.io", + "pagerduty.com", + "atlassian.com", + "jetbrains.com", + "gitlab.com", + "bitbucket.org", + "sourcehut.org", + "codeberg.org", + "launchpad.net", + "savannah.gnu.org", + "letsencrypt.org", + "eff.org", + "torproject.org", + "privacyguides.org", + "matrix.org", + "element.io", + "jitsi.org", + "nextcloud.com", + "syncthing.net", + "tailscale.com", + "mullvad.net", + "proton.me", + "duckduckgo.com", + "brave.com", + "vivaldi.com", +]; + +const ROUNDS: usize = 10; + +fn main() { + let diag = std::env::args().any(|a| a == "--diag"); + let direct = std::env::args().any(|a| a == "--direct"); + + let rt = tokio::runtime::Runtime::new().unwrap(); + + if diag { + run_diag(&rt); + return; + } + + if direct { + run_direct(&rt); + return; + } + + if std::env::args().any(|a| a == "--diag-clients") { + run_diag_clients(&rt); + return; + } + + if std::env::args().any(|a| a == "--spike-trace") { + run_spike_trace(&rt); + return; + } + + if std::env::args().any(|a| a == "--spike-phases") { + run_spike_phases(&rt); + return; + } + + if std::env::args().any(|a| a == "--spike-heartbeat") { + run_spike_heartbeat(&rt); + return; + } + + if std::env::args().any(|a| a == "--hedge") { + run_hedge(&rt); + return; + } + + if std::env::args().any(|a| a == "--hedge-5x") { + run_hedge_multi(&rt, 5); + return; + } + + if std::env::args().any(|a| a == "--vs-dnscrypt") { + run_vs_dnscrypt(&rt, 5); + return; + } + + if std::env::args().any(|a| a == "--vs-unbound") { + run_vs_unbound(&rt, 5); + return; + } + + let numa_addr: SocketAddr = NUMA_BENCH.parse().unwrap(); + + println!("DoH Forwarding Benchmark: Numa vs hickory-resolver"); + println!("Both forwarding to {DOH_UPSTREAM}"); + println!("{} domains × {ROUNDS} rounds", DOMAINS.len()); + println!(); + + // Verify bench Numa is reachable + if rt.block_on(query_udp(numa_addr, "example.com")).is_none() { + eprintln!("Bench Numa not responding on {numa_addr}"); + eprintln!(); + eprintln!("Start it with:"); + eprintln!(" cargo run -- benches/numa-bench.toml"); + std::process::exit(1); + } + + // Build a shared Hickory resolver (reuses TLS connection, like Numa does) + let resolver = rt.block_on(build_hickory_resolver()); + + // Warm up both paths (TLS handshake, connection establishment) + println!("Warming up connections..."); + for _ in 0..3 { + rt.block_on(query_udp(numa_addr, "example.com")); + rt.block_on(query_hickory_doh(&resolver, "example.com")); + } + flush_cache(); + + println!( + "{:<30} {:>10} {:>10} {:>10} {:>8} {:>8}", + "Domain", "Numa (ms)", "Hickory", "Delta", "σ Numa", "σ Hick" + ); + println!("{}", "-".repeat(92)); + + let mut numa_all = Vec::new(); + let mut hickory_all = Vec::new(); + let mut per_domain: Vec<(&str, f64, f64, f64, f64, f64)> = Vec::new(); + + for domain in DOMAINS { + let mut numa_times = Vec::with_capacity(ROUNDS); + let mut hickory_times = Vec::with_capacity(ROUNDS); + + for round in 0..ROUNDS { + flush_cache(); + std::thread::sleep(Duration::from_millis(10)); + + // Alternate measurement order each round to cancel systematic bias + if round % 2 == 0 { + // Numa first + let t = measure(&rt, || rt.block_on(query_udp(numa_addr, domain))); + numa_times.push(t); + let t = measure(&rt, || rt.block_on(query_hickory_doh(&resolver, domain))); + hickory_times.push(t); + } else { + // Hickory first + let t = measure(&rt, || rt.block_on(query_hickory_doh(&resolver, domain))); + hickory_times.push(t); + flush_cache(); + std::thread::sleep(Duration::from_millis(10)); + let t = measure(&rt, || rt.block_on(query_udp(numa_addr, domain))); + numa_times.push(t); + } + } + + let numa_avg = mean(&numa_times); + let hickory_avg = mean(&hickory_times); + let numa_sd = stddev(&numa_times); + let hickory_sd = stddev(&hickory_times); + let delta = numa_avg - hickory_avg; + + numa_all.extend_from_slice(&numa_times); + hickory_all.extend_from_slice(&hickory_times); + per_domain.push((domain, numa_avg, hickory_avg, delta, numa_sd, hickory_sd)); + + let delta_str = format_delta(delta); + println!( + "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms {:>5.1}ms {:>5.1}ms", + domain, numa_avg, hickory_avg, delta_str, numa_sd, hickory_sd + ); + } + + println!("{}", "-".repeat(92)); + + let numa_mean = mean(&numa_all); + let hickory_mean = mean(&hickory_all); + let delta_mean = numa_mean - hickory_mean; + + println!( + "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms {:>5.1}ms {:>5.1}ms", + "OVERALL MEAN", + numa_mean, + hickory_mean, + format_delta(delta_mean), + stddev(&numa_all), + stddev(&hickory_all), + ); + + // Median + let numa_med = median(&mut numa_all); + let hickory_med = median(&mut hickory_all); + println!( + "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms", + "MEDIAN", + numa_med, + hickory_med, + format_delta(numa_med - hickory_med), + ); + + // P95 + let numa_p95 = percentile(&numa_all, 95.0); + let hickory_p95 = percentile(&hickory_all, 95.0); + println!( + "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms", + "P95", + numa_p95, + hickory_p95, + format_delta(numa_p95 - hickory_p95), + ); + + println!(); + let total_queries = DOMAINS.len() * ROUNDS; + if numa_mean < hickory_mean { + let pct = ((hickory_mean - numa_mean) / hickory_mean * 100.0).round(); + println!("Numa is ~{pct}% faster (mean over {total_queries} queries)."); + } else if hickory_mean < numa_mean { + let pct = ((numa_mean - hickory_mean) / numa_mean * 100.0).round(); + println!("Hickory is ~{pct}% faster (mean over {total_queries} queries)."); + } else { + println!("Both are equal (mean over {total_queries} queries)."); + } + + println!(); + println!("Methodology:"); + println!(" - Both forward to {DOH_UPSTREAM} over a reused TLS connection."); + println!(" - Numa cache flushed before each query. Hickory cache disabled."); + println!(" - Measurement order alternates each round to cancel order bias."); + println!(" - {} domains × {ROUNDS} rounds = {total_queries} queries per resolver.", DOMAINS.len()); +} + +fn run_diag(rt: &tokio::runtime::Runtime) { + println!("Hickory connection reuse diagnostic"); + println!("20 sequential queries to {DOH_UPSTREAM} via one shared resolver"); + println!("If conn is reused: query 1 slow (TLS handshake), rest fast.\n"); + + let resolver = rt.block_on(build_hickory_resolver()); + + let domains = [ + "example.com", "rust-lang.org", "kernel.org", "google.com", "github.com", + "example.com", "rust-lang.org", "kernel.org", "google.com", "github.com", + "example.com", "rust-lang.org", "kernel.org", "google.com", "github.com", + "example.com", "rust-lang.org", "kernel.org", "google.com", "github.com", + ]; + + println!("{:>3} {:<20} {:>10}", "#", "Domain", "Time (ms)"); + println!("{}", "-".repeat(40)); + + for (i, domain) in domains.iter().enumerate() { + use hickory_resolver::proto::rr::RecordType; + let start = Instant::now(); + let result = rt.block_on(resolver.lookup(*domain, RecordType::A)); + let ms = start.elapsed().as_secs_f64() * 1000.0; + match &result { + Ok(lookup) => { + let first = lookup.iter().next().map(|r| format!("{r}")).unwrap_or_default(); + println!("{:>3} {:<20} {:>7.1} ms OK {}", i + 1, domain, ms, first); + } + Err(e) => { + println!("{:>3} {:<20} {:>7.1} ms ERR {}", i + 1, domain, ms, e); + } + } + } +} + +/// Library-to-library comparison: Numa's forward_query_raw vs Hickory's resolver.lookup(). +/// No UDP, no server pipeline — just the DoH forwarding call. +fn run_direct(rt: &tokio::runtime::Runtime) { + println!("Direct DoH Forwarding: Numa forward_query_raw vs Hickory resolver.lookup()"); + println!("Both forwarding to {DOH_UPSTREAM} — no UDP, no server pipeline"); + println!("{} domains × {ROUNDS} rounds", DOMAINS.len()); + println!(); + + // Build Numa's upstream (shared reqwest client, reuses HTTP/2 connection) + let numa_upstream = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse upstream"); + let timeout = Duration::from_secs(10); + + // Build Hickory's resolver (shared, reuses HTTP/2 connection) + let resolver = rt.block_on(build_hickory_resolver()); + + // Warm up both + println!("Warming up connections..."); + for _ in 0..3 { + let wire = build_query_vec("example.com"); + let _ = rt.block_on(numa::forward::forward_query_raw(&wire, &numa_upstream, timeout)); + let _ = rt.block_on(query_hickory_doh(&resolver, "example.com")); + } + + println!( + "{:<30} {:>10} {:>10} {:>10} {:>8} {:>8}", + "Domain", "Numa (ms)", "Hickory", "Delta", "σ Numa", "σ Hick" + ); + println!("{}", "-".repeat(92)); + + let mut numa_all = Vec::new(); + let mut hickory_all = Vec::new(); + + for domain in DOMAINS { + let mut numa_times = Vec::with_capacity(ROUNDS); + let mut hickory_times = Vec::with_capacity(ROUNDS); + + for round in 0..ROUNDS { + let wire = build_query_vec(domain); + + if round % 2 == 0 { + let w = wire.clone(); + let t = measure(rt, || { + rt.block_on(numa::forward::forward_query_raw(&w, &numa_upstream, timeout)) + }); + numa_times.push(t); + let t = measure(rt, || rt.block_on(query_hickory_doh(&resolver, domain))); + hickory_times.push(t); + } else { + let t = measure(rt, || rt.block_on(query_hickory_doh(&resolver, domain))); + hickory_times.push(t); + let w = wire.clone(); + let t = measure(rt, || { + rt.block_on(numa::forward::forward_query_raw(&w, &numa_upstream, timeout)) + }); + numa_times.push(t); + } + } + + let numa_avg = mean(&numa_times); + let hickory_avg = mean(&hickory_times); + let numa_sd = stddev(&numa_times); + let hickory_sd = stddev(&hickory_times); + let delta = numa_avg - hickory_avg; + + numa_all.extend_from_slice(&numa_times); + hickory_all.extend_from_slice(&hickory_times); + + println!( + "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms {:>5.1}ms {:>5.1}ms", + domain, numa_avg, hickory_avg, format_delta(delta), numa_sd, hickory_sd + ); + } + + println!("{}", "-".repeat(92)); + let numa_mean = mean(&numa_all); + let hickory_mean = mean(&hickory_all); + println!( + "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms {:>5.1}ms {:>5.1}ms", + "OVERALL MEAN", numa_mean, hickory_mean, format_delta(numa_mean - hickory_mean), + stddev(&numa_all), stddev(&hickory_all), + ); + let numa_med = median(&mut numa_all); + let hickory_med = median(&mut hickory_all); + println!( + "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms", + "MEDIAN", numa_med, hickory_med, format_delta(numa_med - hickory_med), + ); + let numa_p95 = percentile(&numa_all, 95.0); + let hickory_p95 = percentile(&hickory_all, 95.0); + println!( + "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms", + "P95", numa_p95, hickory_p95, format_delta(numa_p95 - hickory_p95), + ); + + println!(); + let total_queries = DOMAINS.len() * ROUNDS; + if numa_mean < hickory_mean { + let pct = ((hickory_mean - numa_mean) / hickory_mean * 100.0).round(); + println!("Numa is ~{pct}% faster (mean over {total_queries} queries)."); + } else if hickory_mean < numa_mean { + let pct = ((numa_mean - hickory_mean) / numa_mean * 100.0).round(); + println!("Hickory is ~{pct}% faster (mean over {total_queries} queries)."); + } else { + println!("Both are equal (mean over {total_queries} queries)."); + } + + println!(); + println!("Methodology:"); + println!(" - Both forward to {DOH_UPSTREAM} over a reused TLS/HTTP2 connection."); + println!(" - No UDP, no server pipeline, no cache — pure DoH forwarding."); + println!(" - Numa: forward_query_raw (reqwest). Hickory: resolver.lookup (h2)."); + println!(" - {} domains × {ROUNDS} rounds = {total_queries} queries per implementation.", DOMAINS.len()); +} + +/// Per-query timing diagnostic: 20 queries each through reqwest and Hickory. +/// Shows whether reqwest has connection reuse issues or per-request overhead. +fn run_diag_clients(rt: &tokio::runtime::Runtime) { + println!("Client diagnostic: reqwest vs Hickory per-query timing"); + println!("20 queries each to {DOH_UPSTREAM}\n"); + + let upstream = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse upstream"); + let resolver = rt.block_on(build_hickory_resolver()); + let timeout = Duration::from_secs(10); + + // Warm both + for _ in 0..3 { + let w = build_query_vec("example.com"); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &upstream, timeout)); + let _ = rt.block_on(query_hickory_doh(&resolver, "example.com")); + } + + let domains = [ + "example.com", "google.com", "github.com", "rust-lang.org", "cloudflare.com", + "example.com", "google.com", "github.com", "rust-lang.org", "cloudflare.com", + "example.com", "google.com", "github.com", "rust-lang.org", "cloudflare.com", + "example.com", "google.com", "github.com", "rust-lang.org", "cloudflare.com", + ]; + + println!("{:>3} {:<20} {:>12} {:>12}", "#", "Domain", "reqwest", "Hickory"); + println!("{}", "-".repeat(55)); + + for (i, domain) in domains.iter().enumerate() { + let wire = build_query_vec(domain); + + let start = Instant::now(); + let r_result = rt.block_on(numa::forward::forward_query_raw(&wire, &upstream, timeout)); + let r_ms = start.elapsed().as_secs_f64() * 1000.0; + let r_ok = if r_result.is_ok() { "OK" } else { "FAIL" }; + + let start = Instant::now(); + let h_result = rt.block_on(query_hickory_doh(&resolver, domain)); + let h_ms = start.elapsed().as_secs_f64() * 1000.0; + let h_ok = if h_result.is_some() { "OK" } else { "FAIL" }; + + println!( + "{:>3} {:<20} {:>7.1} ms {} {:>7.1} ms {}", + i + 1, domain, r_ms, r_ok, h_ms, h_ok + ); + } +} + +/// Spike trace: fire 200 sequential queries through reqwest and log every one +/// with a timestamp. Analyze the distribution and find spike clusters. +fn run_spike_trace(rt: &tokio::runtime::Runtime) { + println!("Spike trace: 200 sequential reqwest DoH queries"); + println!("Target: {DOH_UPSTREAM}\n"); + + let upstream = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse upstream"); + let timeout = Duration::from_secs(10); + + // Warm + for _ in 0..5 { + let w = build_query_vec("example.com"); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &upstream, timeout)); + } + + // Run the entire 200-query loop inside ONE block_on to eliminate + // per-query runtime re-entry overhead. + let samples: Vec<(u128, f64)> = rt.block_on(async { + let test_start = Instant::now(); + let mut s = Vec::with_capacity(200); + for i in 0..200 { + let domain = match i % 5 { + 0 => "example.com", + 1 => "google.com", + 2 => "github.com", + 3 => "rust-lang.org", + _ => "cloudflare.com", + }; + let wire = build_query_vec(domain); + let req_start = Instant::now(); + let t_from_start_us = test_start.elapsed().as_micros(); + let _ = numa::forward::forward_query_raw(&wire, &upstream, timeout).await; + let ms = req_start.elapsed().as_secs_f64() * 1000.0; + s.push((t_from_start_us, ms)); + } + s + }); + + // Compute stats + let mut sorted_times: Vec = samples.iter().map(|(_, t)| *t).collect(); + sorted_times.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let n = sorted_times.len(); + let median = sorted_times[n / 2]; + let p90 = sorted_times[(n * 90) / 100]; + let p95 = sorted_times[(n * 95) / 100]; + let p99 = sorted_times[(n * 99) / 100]; + let max = sorted_times[n - 1]; + let mean: f64 = sorted_times.iter().sum::() / n as f64; + + println!("Distribution (n={}):", n); + println!(" mean: {:.1} ms", mean); + println!(" median: {:.1} ms", median); + println!(" p90: {:.1} ms", p90); + println!(" p95: {:.1} ms", p95); + println!(" p99: {:.1} ms", p99); + println!(" max: {:.1} ms", max); + println!(); + + // Define spike threshold as 3x median + let spike_threshold = median * 3.0; + let spikes: Vec<(usize, u128, f64)> = samples + .iter() + .enumerate() + .filter(|(_, (_, t))| *t > spike_threshold) + .map(|(i, (ts, t))| (i, *ts, *t)) + .collect(); + + println!("Spikes (> {:.1}ms, which is 3x median):", spike_threshold); + println!(" count: {}", spikes.len()); + if spikes.is_empty() { + return; + } + + // Inter-spike gaps (time between spikes) + let mut gaps_ms: Vec = Vec::new(); + for w in spikes.windows(2) { + let gap_us = w[1].1 - w[0].1; + gaps_ms.push(gap_us as f64 / 1000.0); + } + + println!(); + println!(" {:>4} {:>12} {:>10} {:>12}", "idx", "at (ms)", "latency", "gap from prev"); + for (i, ((idx, ts, latency), gap)) in spikes.iter().zip( + std::iter::once(&0.0).chain(gaps_ms.iter()) + ).enumerate() { + let _ = i; + let gap_str = if *gap > 0.0 { + format!("{:.0} ms", gap) + } else { + "-".to_string() + }; + println!(" {:>4} {:>9.1} {:>6.1} ms {:>12}", idx, *ts as f64 / 1000.0, latency, gap_str); + } + + if !gaps_ms.is_empty() { + let gap_mean: f64 = gaps_ms.iter().sum::() / gaps_ms.len() as f64; + let mut gap_sorted = gaps_ms.clone(); + gap_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let gap_median = gap_sorted[gap_sorted.len() / 2]; + println!(); + println!(" Inter-spike gap: mean={:.0}ms, median={:.0}ms", gap_mean, gap_median); + } +} + +/// Spike phases: time each step of the reqwest DoH call to find which phase +/// is slow during a spike. Reports (build+send, send->resp headers, body read). +fn run_spike_phases(rt: &tokio::runtime::Runtime) { + println!("Spike phases: timing each phase of reqwest DoH call"); + println!("Target: {DOH_UPSTREAM}\n"); + + // Build the same tuned client our forward_doh uses + let client = reqwest::Client::builder() + .use_rustls_tls() + .http2_initial_stream_window_size(65_535) + .http2_initial_connection_window_size(65_535) + .http2_keep_alive_interval(Duration::from_secs(15)) + .http2_keep_alive_while_idle(true) + .http2_keep_alive_timeout(Duration::from_secs(10)) + .pool_idle_timeout(Duration::from_secs(300)) + .pool_max_idle_per_host(1) + .build() + .unwrap(); + + // Warm up + for _ in 0..5 { + let wire = build_query_vec("example.com"); + let _ = rt.block_on(async { + client + .post(DOH_UPSTREAM) + .header("content-type", "application/dns-message") + .header("accept", "application/dns-message") + .body(wire) + .send() + .await + .ok()? + .bytes() + .await + .ok() + }); + } + + println!("{:>4} {:>8} {:>8} {:>8} {:>8}", "idx", "total", "build", "send", "body"); + println!("{}", "-".repeat(50)); + + let samples: Vec<(f64, f64, f64, f64)> = rt.block_on(async { + let mut s = Vec::with_capacity(200); + for i in 0..200 { + let domain = match i % 5 { + 0 => "example.com", + 1 => "google.com", + 2 => "github.com", + 3 => "rust-lang.org", + _ => "cloudflare.com", + }; + let wire = build_query_vec(domain); + + let t0 = Instant::now(); + // Phase 1: build the request + let req = client + .post(DOH_UPSTREAM) + .header("content-type", "application/dns-message") + .header("accept", "application/dns-message") + .body(wire); + let t1 = Instant::now(); + // Phase 2: send() — this is the dispatch channel + round trip to headers + let resp_result = req.send().await; + let t2 = Instant::now(); + // Phase 3: read body + let body_result = match resp_result { + Ok(r) => r.bytes().await.ok().map(|b| b.len()), + Err(_) => None, + }; + let t3 = Instant::now(); + + let build_ms = (t1 - t0).as_secs_f64() * 1000.0; + let send_ms = (t2 - t1).as_secs_f64() * 1000.0; + let body_ms = (t3 - t2).as_secs_f64() * 1000.0; + let total_ms = (t3 - t0).as_secs_f64() * 1000.0; + + s.push((total_ms, build_ms, send_ms, body_ms)); + let _ = body_result; + } + s + }); + + // Compute distribution on total + let mut totals: Vec = samples.iter().map(|s| s.0).collect(); + totals.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let median = totals[100]; + + // Print spikes (> 3x median) with phase breakdown + for (i, (total, build, send, body)) in samples.iter().enumerate() { + if *total > median * 3.0 { + println!( + "{:>4} {:>5.1} ms {:>5.1} ms {:>5.1} ms {:>5.1} ms", + i, total, build, send, body + ); + } + } + + // Summary: mean of each phase for spikes vs non-spikes + let (spike_samples, normal_samples): (Vec<_>, Vec<_>) = samples + .iter() + .partition(|(t, _, _, _)| *t > median * 3.0); + + let phase_means = |samples: &[&(f64, f64, f64, f64)]| -> (f64, f64, f64, f64) { + let n = samples.len() as f64; + if n == 0.0 { return (0.0, 0.0, 0.0, 0.0); } + let total: f64 = samples.iter().map(|s| s.0).sum::() / n; + let build: f64 = samples.iter().map(|s| s.1).sum::() / n; + let send: f64 = samples.iter().map(|s| s.2).sum::() / n; + let body: f64 = samples.iter().map(|s| s.3).sum::() / n; + (total, build, send, body) + }; + + let spike_refs: Vec<&(f64, f64, f64, f64)> = spike_samples.iter().copied().collect(); + let normal_refs: Vec<&(f64, f64, f64, f64)> = normal_samples.iter().copied().collect(); + let (s_total, s_build, s_send, s_body) = phase_means(&spike_refs); + let (n_total, n_build, n_send, n_body) = phase_means(&normal_refs); + + println!(); + println!("Summary (mean ms):"); + println!( + " {:<8} {:>8} {:>8} {:>8} {:>8}", + "", "total", "build", "send", "body" + ); + println!( + " {:<8} {:>5.1} ms {:>5.1} ms {:>5.1} ms {:>5.1} ms (n={})", + "normal", n_total, n_build, n_send, n_body, normal_refs.len() + ); + println!( + " {:<8} {:>5.1} ms {:>5.1} ms {:>5.1} ms {:>5.1} ms (n={})", + "spike", s_total, s_build, s_send, s_body, spike_refs.len() + ); + println!(); + println!("Delta (spike - normal):"); + println!( + " build: {:+.1} ms, send: {:+.1} ms, body: {:+.1} ms", + s_build - n_build, + s_send - n_send, + s_body - n_body + ); +} + +/// Heartbeat probe: run a parallel task that ticks every 5ms and records +/// how long each tick actually takes. If the heartbeat stalls during a DoH +/// spike, it's a tokio scheduling issue (runtime can't poll tasks). If +/// heartbeat is fine while send() is stuck, it's internal to hyper/h2. +fn run_spike_heartbeat(rt: &tokio::runtime::Runtime) { + use std::sync::{Arc, Mutex}; + + println!("Spike heartbeat probe"); + println!("Running DoH queries + parallel 5ms heartbeat task\n"); + + let upstream = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse upstream"); + let timeout = Duration::from_secs(10); + + // Warm up + for _ in 0..5 { + let w = build_query_vec("example.com"); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &upstream, timeout)); + } + + // Shared vecs: (relative_ms_from_start, event_kind, latency_ms) + // event_kind: 0 = heartbeat, 1 = doh query + type EventLog = Vec<(f64, u8, f64)>; + let events: Arc> = Arc::new(Mutex::new(Vec::with_capacity(2000))); + let stop = Arc::new(std::sync::atomic::AtomicBool::new(false)); + + let test_start = Instant::now(); + + rt.block_on(async { + // Spawn heartbeat task + let hb_events = Arc::clone(&events); + let hb_stop = Arc::clone(&stop); + let hb_start = test_start; + let heartbeat = tokio::spawn(async move { + let mut next_tick = Instant::now(); + let target = Duration::from_millis(5); + while !hb_stop.load(std::sync::atomic::Ordering::Relaxed) { + next_tick += target; + // Sleep until the next scheduled tick + let now = Instant::now(); + if next_tick > now { + tokio::time::sleep(next_tick - now).await; + } + // Measure how much we overshot the scheduled tick + let actual = Instant::now(); + let lag_ms = if actual > next_tick { + (actual - next_tick).as_secs_f64() * 1000.0 + } else { + 0.0 + }; + let t = (actual - hb_start).as_secs_f64() * 1000.0; + if let Ok(mut e) = hb_events.lock() { + e.push((t, 0, lag_ms)); + } + } + }); + + // Run 200 DoH queries and record their timings + for i in 0..200 { + let domain = match i % 5 { + 0 => "example.com", + 1 => "google.com", + 2 => "github.com", + 3 => "rust-lang.org", + _ => "cloudflare.com", + }; + let wire = build_query_vec(domain); + let req_start = Instant::now(); + let _ = numa::forward::forward_query_raw(&wire, &upstream, timeout).await; + let elapsed = req_start.elapsed().as_secs_f64() * 1000.0; + let t = (req_start - test_start).as_secs_f64() * 1000.0; + if let Ok(mut e) = events.lock() { + e.push((t, 1, elapsed)); + } + } + + stop.store(true, std::sync::atomic::Ordering::Relaxed); + let _ = heartbeat.await; + }); + + let events = events.lock().unwrap(); + + // Separate heartbeats and doh events + let hb: Vec<(f64, f64)> = events + .iter() + .filter(|(_, k, _)| *k == 0) + .map(|(t, _, l)| (*t, *l)) + .collect(); + let doh: Vec<(f64, f64)> = events + .iter() + .filter(|(_, k, _)| *k == 1) + .map(|(t, _, l)| (*t, *l)) + .collect(); + + // Heartbeat stats + let mut hb_lags: Vec = hb.iter().map(|(_, l)| *l).collect(); + hb_lags.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let hb_n = hb_lags.len(); + let hb_median = hb_lags[hb_n / 2]; + let hb_p95 = hb_lags[(hb_n * 95) / 100]; + let hb_p99 = hb_lags[(hb_n * 99) / 100]; + let hb_max = hb_lags[hb_n - 1]; + + // DoH stats + let mut doh_latencies: Vec = doh.iter().map(|(_, l)| *l).collect(); + doh_latencies.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let doh_n = doh_latencies.len(); + let doh_median = doh_latencies[doh_n / 2]; + let doh_p95 = doh_latencies[(doh_n * 95) / 100]; + let doh_max = doh_latencies[doh_n - 1]; + + println!("Heartbeat lag (tick overshoot, {}ms target):", 5); + println!(" n: {}", hb_n); + println!(" median: {:.2} ms", hb_median); + println!(" p95: {:.2} ms", hb_p95); + println!(" p99: {:.2} ms", hb_p99); + println!(" max: {:.2} ms", hb_max); + println!(); + println!("DoH latency:"); + println!(" n: {}", doh_n); + println!(" median: {:.1} ms", doh_median); + println!(" p95: {:.1} ms", doh_p95); + println!(" max: {:.1} ms", doh_max); + println!(); + + // Find DoH spikes and check heartbeat activity DURING each spike + let doh_spike_threshold = doh_median * 3.0; + let mut spikes_with_hb_lag = 0; + let mut spikes_total = 0; + let mut max_hb_during_any_spike = 0.0_f64; + + println!( + "Correlation: during each DoH spike (>{:.1}ms), max heartbeat lag:", + doh_spike_threshold + ); + println!(" {:>6} {:>10} {:>18}", "doh_at", "doh_ms", "max_hb_lag_during"); + + for (doh_t, doh_ms) in &doh { + if *doh_ms > doh_spike_threshold { + spikes_total += 1; + // Find heartbeats that happened during this DoH query + let spike_start = *doh_t; + let spike_end = spike_start + *doh_ms; + let mut max_hb = 0.0_f64; + for (hb_t, hb_lag) in &hb { + if *hb_t >= spike_start && *hb_t <= spike_end + 20.0 { + if *hb_lag > max_hb { + max_hb = *hb_lag; + } + } + } + if max_hb > 5.0 { + spikes_with_hb_lag += 1; + } + max_hb_during_any_spike = max_hb_during_any_spike.max(max_hb); + println!( + " {:>5.0} ms {:>7.1} ms {:>14.2} ms", + doh_t, doh_ms, max_hb + ); + } + } + + println!(); + println!("Conclusion:"); + if spikes_total == 0 { + println!(" No DoH spikes in this run."); + } else { + let pct = (spikes_with_hb_lag as f64 / spikes_total as f64 * 100.0).round(); + println!( + " {}/{} spikes ({:.0}%) had concurrent heartbeat lag >5ms.", + spikes_with_hb_lag, spikes_total, pct + ); + println!(" Max heartbeat lag during any spike: {:.2}ms", max_hb_during_any_spike); + println!(); + if max_hb_during_any_spike > 20.0 { + println!(" → Heartbeat stalls during DoH spikes: tokio scheduling / OS thread issue."); + println!(" The runtime can't poll ANY task — likely QoS demotion, GC pause,"); + println!(" or the worker thread is blocked somewhere."); + } else { + println!(" → Heartbeat runs normally during DoH spikes: internal to hyper/h2."); + println!(" The runtime is fine, but send()'s await is stuck waiting for"); + println!(" the ClientTask to poll the dispatch channel."); + } + } +} + +/// Hedging benchmark: tests four configurations against Hickory. +/// Single: 1 client → Quad9 (baseline) +/// Hedge-same: hedge against same client/connection → Quad9 +/// Hedge-dual: hedge against 2 separate clients, both → Quad9 (same upstream, 2 HTTP/2 conns) +/// Hickory: Hickory resolver → Quad9 (reference) +fn run_hedge(rt: &tokio::runtime::Runtime) { + let hedge_delay = Duration::from_millis(10); + + println!("Hedging Benchmark (all paths → Quad9 only)"); + println!("Upstream: {}", DOH_UPSTREAM); + println!("Hedge delay: {:?}", hedge_delay); + println!("{} domains × {} rounds\n", DOMAINS.len(), ROUNDS); + + // Primary and secondary: two separate reqwest clients → same Quad9 URL. + // This gives two independent HTTP/2 connections, so dispatch spikes + // are uncorrelated (at most one stalls at a time). + let primary_same = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse primary"); + let primary_dual = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse primary_dual"); + let secondary_dual = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse secondary_dual"); + let timeout = Duration::from_secs(10); + + let resolver = rt.block_on(build_hickory_resolver()); + + // Warm up all paths (separate connections need their own TLS handshake) + println!("Warming up connections..."); + for _ in 0..5 { + let w = build_query_vec("example.com"); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary_same, timeout)); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary_dual, timeout)); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &secondary_dual, timeout)); + let _ = rt.block_on(query_hickory_doh(&resolver, "example.com")); + } + + let mut single_all = Vec::new(); + let mut hedge_same_all = Vec::new(); + let mut hedge_dual_all = Vec::new(); + let mut hickory_all = Vec::new(); + + println!( + "{:<24} {:>10} {:>10} {:>10} {:>10}", + "Domain", "Single", "Hedge-same", "Hedge-dual", "Hickory" + ); + println!("{}", "-".repeat(78)); + + for domain in DOMAINS { + let mut single_times = Vec::with_capacity(ROUNDS); + let mut hedge_same_times = Vec::with_capacity(ROUNDS); + let mut hedge_dual_times = Vec::with_capacity(ROUNDS); + let mut hickory_times = Vec::with_capacity(ROUNDS); + + for _ in 0..ROUNDS { + let wire = build_query_vec(domain); + + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_query_raw(&wire, &primary_same, timeout)); + single_times.push(t.elapsed().as_secs_f64() * 1000.0); + + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_with_hedging_raw( + &wire, &primary_same, &primary_same, hedge_delay, timeout, + )); + hedge_same_times.push(t.elapsed().as_secs_f64() * 1000.0); + + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_with_hedging_raw( + &wire, &primary_dual, &secondary_dual, hedge_delay, timeout, + )); + hedge_dual_times.push(t.elapsed().as_secs_f64() * 1000.0); + + let t = Instant::now(); + let _ = rt.block_on(query_hickory_doh(&resolver, domain)); + hickory_times.push(t.elapsed().as_secs_f64() * 1000.0); + } + + single_all.extend_from_slice(&single_times); + hedge_same_all.extend_from_slice(&hedge_same_times); + hedge_dual_all.extend_from_slice(&hedge_dual_times); + hickory_all.extend_from_slice(&hickory_times); + + println!( + "{:<24} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", + domain, + mean(&single_times), + mean(&hedge_same_times), + mean(&hedge_dual_times), + mean(&hickory_times) + ); + } + + println!("{}", "-".repeat(78)); + + let stats = |all: &mut Vec| -> (f64, f64, f64, f64, f64) { + let m = mean(all); + let med = median(all); + let p95 = percentile(all, 95.0); + let p99 = percentile(all, 99.0); + let sd = stddev(all); + (m, med, p95, p99, sd) + }; + + let (s_m, s_med, s_p95, s_p99, s_sd) = stats(&mut single_all); + let (hs_m, hs_med, hs_p95, hs_p99, hs_sd) = stats(&mut hedge_same_all); + let (hd_m, hd_med, hd_p95, hd_p99, hd_sd) = stats(&mut hedge_dual_all); + let (k_m, k_med, k_p95, k_p99, k_sd) = stats(&mut hickory_all); + + println!(); + println!( + "{:<10} {:>10} {:>10} {:>10} {:>10}", + "", "Single", "Hedge-same", "Hedge-dual", "Hickory" + ); + println!( + "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", + "mean", s_m, hs_m, hd_m, k_m + ); + println!( + "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", + "median", s_med, hs_med, hd_med, k_med + ); + println!( + "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", + "p95", s_p95, hs_p95, hd_p95, k_p95 + ); + println!( + "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", + "p99", s_p99, hs_p99, hd_p99, k_p99 + ); + println!( + "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", + "σ", s_sd, hs_sd, hd_sd, k_sd + ); + + println!(); + println!("Hedge-same improvement over single:"); + println!(" mean: {:+.0}%, p95: {:+.0}%, p99: {:+.0}%", + (hs_m - s_m) / s_m * 100.0, + (hs_p95 - s_p95) / s_p95 * 100.0, + (hs_p99 - s_p99) / s_p99 * 100.0); + println!("Hedge-dual improvement over single:"); + println!(" mean: {:+.0}%, p95: {:+.0}%, p99: {:+.0}%", + (hd_m - s_m) / s_m * 100.0, + (hd_p95 - s_p95) / s_p95 * 100.0, + (hd_p99 - s_p99) / s_p99 * 100.0); +} + +/// Run the hedging benchmark N times and aggregate samples across all runs. +/// Also reports per-run stats to show drift. +fn run_hedge_multi(rt: &tokio::runtime::Runtime, iterations: usize) { + let hedge_delay = Duration::from_millis(10); + + println!("Hedging Benchmark × {} iterations", iterations); + println!("Upstream: {}", DOH_UPSTREAM); + println!("Hedge delay: {:?}", hedge_delay); + println!("{} domains × {} rounds per iteration\n", DOMAINS.len(), ROUNDS); + + let primary_same = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let primary_dual = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let secondary_dual = + numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let timeout = Duration::from_secs(10); + + let resolver = rt.block_on(build_hickory_resolver()); + + // Warm up + println!("Warming up..."); + for _ in 0..5 { + let w = build_query_vec("example.com"); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary_same, timeout)); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary_dual, timeout)); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &secondary_dual, timeout)); + let _ = rt.block_on(query_hickory_doh(&resolver, "example.com")); + } + + // Accumulated samples across all iterations + let mut all_single = Vec::new(); + let mut all_hedge_same = Vec::new(); + let mut all_hedge_dual = Vec::new(); + let mut all_hickory = Vec::new(); + + // Per-iteration summary stats + let mut iter_stats: Vec<[(f64, f64, f64, f64, f64); 4]> = Vec::new(); + + for iter in 1..=iterations { + println!(" iteration {}/{}...", iter, iterations); + + let mut single = Vec::new(); + let mut hedge_same = Vec::new(); + let mut hedge_dual = Vec::new(); + let mut hickory = Vec::new(); + + for domain in DOMAINS { + for _ in 0..ROUNDS { + let wire = build_query_vec(domain); + + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_query_raw(&wire, &primary_same, timeout)); + single.push(t.elapsed().as_secs_f64() * 1000.0); + + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_with_hedging_raw( + &wire, &primary_same, &primary_same, hedge_delay, timeout, + )); + hedge_same.push(t.elapsed().as_secs_f64() * 1000.0); + + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_with_hedging_raw( + &wire, &primary_dual, &secondary_dual, hedge_delay, timeout, + )); + hedge_dual.push(t.elapsed().as_secs_f64() * 1000.0); + + let t = Instant::now(); + let _ = rt.block_on(query_hickory_doh(&resolver, domain)); + hickory.push(t.elapsed().as_secs_f64() * 1000.0); + } + } + + let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { + (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) + }; + iter_stats.push([ + stats(&mut single), + stats(&mut hedge_same), + stats(&mut hedge_dual), + stats(&mut hickory), + ]); + + all_single.extend_from_slice(&single); + all_hedge_same.extend_from_slice(&hedge_same); + all_hedge_dual.extend_from_slice(&hedge_dual); + all_hickory.extend_from_slice(&hickory); + } + + println!(); + println!("=== Per-iteration medians (drift check) ==="); + println!( + "{:<8} {:>10} {:>12} {:>12} {:>10}", + "iter", "Single", "Hedge-same", "Hedge-dual", "Hickory" + ); + for (i, s) in iter_stats.iter().enumerate() { + println!( + "{:<8} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + i + 1, + s[0].1, + s[1].1, + s[2].1, + s[3].1 + ); + } + + println!(); + println!("=== Per-iteration p99 (drift check) ==="); + println!( + "{:<8} {:>10} {:>12} {:>12} {:>10}", + "iter", "Single", "Hedge-same", "Hedge-dual", "Hickory" + ); + for (i, s) in iter_stats.iter().enumerate() { + println!( + "{:<8} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + i + 1, + s[0].3, + s[1].3, + s[2].3, + s[3].3 + ); + } + + let final_stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { + (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) + }; + let (s_m, s_med, s_p95, s_p99, s_sd) = final_stats(&mut all_single); + let (hs_m, hs_med, hs_p95, hs_p99, hs_sd) = final_stats(&mut all_hedge_same); + let (hd_m, hd_med, hd_p95, hd_p99, hd_sd) = final_stats(&mut all_hedge_dual); + let (k_m, k_med, k_p95, k_p99, k_sd) = final_stats(&mut all_hickory); + + println!(); + let total = iterations * DOMAINS.len() * ROUNDS; + println!("=== Aggregated across all {} samples per method ===", total); + println!(); + println!( + "{:<10} {:>10} {:>12} {:>12} {:>10}", + "", "Single", "Hedge-same", "Hedge-dual", "Hickory" + ); + println!( + "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + "mean", s_m, hs_m, hd_m, k_m + ); + println!( + "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + "median", s_med, hs_med, hd_med, k_med + ); + println!( + "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + "p95", s_p95, hs_p95, hd_p95, k_p95 + ); + println!( + "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + "p99", s_p99, hs_p99, hd_p99, k_p99 + ); + println!( + "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + "σ", s_sd, hs_sd, hd_sd, k_sd + ); + + println!(); + println!("Hedge-same vs Single: mean {:+.0}%, p95 {:+.0}%, p99 {:+.0}%", + (hs_m - s_m) / s_m * 100.0, + (hs_p95 - s_p95) / s_p95 * 100.0, + (hs_p99 - s_p99) / s_p99 * 100.0); + println!("Hedge-dual vs Single: mean {:+.0}%, p95 {:+.0}%, p99 {:+.0}%", + (hd_m - s_m) / s_m * 100.0, + (hd_p95 - s_p95) / s_p95 * 100.0, + (hd_p99 - s_p99) / s_p99 * 100.0); + println!("Hedge-same vs Hickory: mean {:+.0}%, p95 {:+.0}%, p99 {:+.0}%", + (hs_m - k_m) / k_m * 100.0, + (hs_p95 - k_p95) / k_p95 * 100.0, + (hs_p99 - k_p99) / k_p99 * 100.0); +} + +/// Server-to-server benchmark: Numa vs dnscrypt-proxy vs Unbound. +/// All are full servers: UDP in, encrypted forwarding to Quad9. +/// Numa + dnscrypt: DoH (HTTPS). Unbound: DoT (TLS port 853). +fn run_vs_dnscrypt(rt: &tokio::runtime::Runtime, iterations: usize) { + const DNSCRYPT_ADDR: &str = "127.0.0.1:5455"; + const UNBOUND_ADDR: &str = "127.0.0.1:5456"; + let numa_addr: SocketAddr = NUMA_BENCH.parse().unwrap(); + let dnscrypt_addr: SocketAddr = DNSCRYPT_ADDR.parse().unwrap(); + let unbound_addr: SocketAddr = UNBOUND_ADDR.parse().unwrap(); + + println!("Server-to-Server: Numa vs dnscrypt-proxy vs Unbound"); + println!("Numa (DoH): {}", NUMA_BENCH); + println!("dnscrypt-proxy (DoH): {}", DNSCRYPT_ADDR); + println!("Unbound (DoT): {}", UNBOUND_ADDR); + println!("All forwarding to Quad9 over encrypted transport"); + println!("{} domains × {} rounds × {} iterations\n", + DOMAINS.len(), ROUNDS, iterations); + + // Verify all are up + let servers: Vec<(&str, SocketAddr)> = vec![ + ("Numa", numa_addr), + ("dnscrypt-proxy", dnscrypt_addr), + ("Unbound", unbound_addr), + ]; + for (name, addr) in &servers { + if rt.block_on(query_udp(*addr, "example.com")).is_none() { + eprintln!("{} not responding on {}", name, addr); + std::process::exit(1); + } + } + println!("All servers reachable.\n"); + + // Warm up + println!("Warming up..."); + for _ in 0..5 { + for (_, addr) in &servers { + let _ = rt.block_on(query_udp(*addr, "example.com")); + } + } + + let mut all_numa = Vec::new(); + let mut all_dnscrypt = Vec::new(); + let mut all_unbound = Vec::new(); + let mut iter_stats: Vec<[(f64, f64, f64, f64, f64); 3]> = Vec::new(); + + for iter in 1..=iterations { + println!(" iteration {}/{}...", iter, iterations); + + let mut numa = Vec::new(); + let mut dnscrypt = Vec::new(); + let mut unbound = Vec::new(); + + for domain in DOMAINS { + for round in 0..ROUNDS { + flush_cache(); + std::thread::sleep(Duration::from_millis(5)); + + // Rotate order: 3 servers, 3 possible orderings + let order = round % 3; + let mut measure = |addr: SocketAddr| -> f64 { + let t = Instant::now(); + let _ = rt.block_on(query_udp(addr, domain)); + t.elapsed().as_secs_f64() * 1000.0 + }; + + match order { + 0 => { + numa.push(measure(numa_addr)); + dnscrypt.push(measure(dnscrypt_addr)); + unbound.push(measure(unbound_addr)); + } + 1 => { + dnscrypt.push(measure(dnscrypt_addr)); + unbound.push(measure(unbound_addr)); + numa.push(measure(numa_addr)); + } + _ => { + unbound.push(measure(unbound_addr)); + numa.push(measure(numa_addr)); + dnscrypt.push(measure(dnscrypt_addr)); + } + } + } + } + + let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { + (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) + }; + iter_stats.push([stats(&mut numa), stats(&mut dnscrypt), stats(&mut unbound)]); + + all_numa.extend_from_slice(&numa); + all_dnscrypt.extend_from_slice(&dnscrypt); + all_unbound.extend_from_slice(&unbound); + } + + println!(); + println!("=== Per-iteration medians ==="); + println!("{:<8} {:>10} {:>14} {:>10}", "iter", "Numa", "dnscrypt-proxy", "Unbound"); + for (i, s) in iter_stats.iter().enumerate() { + println!("{:<8} {:>7.1} ms {:>11.1} ms {:>7.1} ms", + i + 1, s[0].1, s[1].1, s[2].1); + } + + println!(); + println!("=== Per-iteration p99 ==="); + println!("{:<8} {:>10} {:>14} {:>10}", "iter", "Numa", "dnscrypt-proxy", "Unbound"); + for (i, s) in iter_stats.iter().enumerate() { + println!("{:<8} {:>7.1} ms {:>11.1} ms {:>7.1} ms", + i + 1, s[0].3, s[1].3, s[2].3); + } + + let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { + (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) + }; + let (n_m, n_med, n_p95, n_p99, n_sd) = stats(&mut all_numa); + let (d_m, d_med, d_p95, d_p99, d_sd) = stats(&mut all_dnscrypt); + let (u_m, u_med, u_p95, u_p99, u_sd) = stats(&mut all_unbound); + + println!(); + let total = iterations * DOMAINS.len() * ROUNDS; + println!("=== Aggregated ({} samples per method) ===", total); + println!(); + println!("{:<10} {:>10} {:>14} {:>10}", "", "Numa", "dnscrypt-proxy", "Unbound"); + println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "mean", n_m, d_m, u_m); + println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "median", n_med, d_med, u_med); + println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "p95", n_p95, d_p95, u_p95); + println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "p99", n_p99, d_p99, u_p99); + println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "σ", n_sd, d_sd, u_sd); + println!(); + + println!("Numa vs dnscrypt-proxy:"); + println!(" mean: {:+.0}%, median: {:+.0}%, p99: {:+.0}%", + (n_m - d_m) / d_m * 100.0, (n_med - d_med) / d_med * 100.0, (n_p99 - d_p99) / d_p99 * 100.0); + println!("Numa vs Unbound:"); + println!(" mean: {:+.0}%, median: {:+.0}%, p99: {:+.0}%", + (n_m - u_m) / u_m * 100.0, (n_med - u_med) / u_med * 100.0, (n_p99 - u_p99) / u_p99 * 100.0); +} + +/// Numa vs Unbound: both forward over plain UDP to Quad9, caching enabled. +/// Truly equal transport — no TLS, no HTTP/2, pure forwarding + cache. +fn run_vs_unbound(rt: &tokio::runtime::Runtime, iterations: usize) { + const UNBOUND_ADDR: &str = "127.0.0.1:5456"; + let numa_addr: SocketAddr = NUMA_BENCH.parse().unwrap(); + let unbound_addr: SocketAddr = UNBOUND_ADDR.parse().unwrap(); + + println!("Numa vs Unbound (both plain UDP forwarding to Quad9, caching enabled)"); + println!("Numa: {} → 9.9.9.9:53 UDP", NUMA_BENCH); + println!("Unbound: {} → 9.9.9.9:53 UDP", UNBOUND_ADDR); + println!("{} domains × {} rounds × {} iterations\n", + DOMAINS.len(), ROUNDS, iterations); + + if rt.block_on(query_udp(numa_addr, "example.com")).is_none() { + eprintln!("Numa not responding"); std::process::exit(1); + } + if rt.block_on(query_udp(unbound_addr, "example.com")).is_none() { + eprintln!("Unbound not responding"); std::process::exit(1); + } + println!("Both servers reachable.\n"); + + println!("Warming up..."); + for _ in 0..5 { + let _ = rt.block_on(query_udp(numa_addr, "example.com")); + let _ = rt.block_on(query_udp(unbound_addr, "example.com")); + } + + let mut all_numa = Vec::new(); + let mut all_unbound = Vec::new(); + let mut iter_stats: Vec<[(f64, f64, f64, f64, f64); 2]> = Vec::new(); + + for iter in 1..=iterations { + println!(" iteration {}/{}...", iter, iterations); + + let mut numa = Vec::new(); + let mut unbound = Vec::new(); + + for domain in DOMAINS { + for round in 0..ROUNDS { + // No cache flushing — both serve from cache after first hit + let mut measure = |addr: SocketAddr| -> f64 { + let t = Instant::now(); + let _ = rt.block_on(query_udp(addr, domain)); + t.elapsed().as_secs_f64() * 1000.0 + }; + + if round % 2 == 0 { + numa.push(measure(numa_addr)); + unbound.push(measure(unbound_addr)); + } else { + unbound.push(measure(unbound_addr)); + numa.push(measure(numa_addr)); + } + } + } + + let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { + (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) + }; + iter_stats.push([stats(&mut numa), stats(&mut unbound)]); + + all_numa.extend_from_slice(&numa); + all_unbound.extend_from_slice(&unbound); + } + + println!(); + println!("=== Per-iteration medians ==="); + println!("{:<8} {:>10} {:>10}", "iter", "Numa", "Unbound"); + for (i, s) in iter_stats.iter().enumerate() { + println!("{:<8} {:>7.1} ms {:>7.1} ms", i + 1, s[0].1, s[1].1); + } + + println!(); + println!("=== Per-iteration p99 ==="); + println!("{:<8} {:>10} {:>10}", "iter", "Numa", "Unbound"); + for (i, s) in iter_stats.iter().enumerate() { + println!("{:<8} {:>7.1} ms {:>7.1} ms", i + 1, s[0].3, s[1].3); + } + + let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { + (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) + }; + let (n_m, n_med, n_p95, n_p99, n_sd) = stats(&mut all_numa); + let (u_m, u_med, u_p95, u_p99, u_sd) = stats(&mut all_unbound); + + println!(); + let total = iterations * DOMAINS.len() * ROUNDS; + println!("=== Aggregated ({} samples per method) ===", total); + println!(); + println!("{:<10} {:>10} {:>10}", "", "Numa", "Unbound"); + println!("{:<10} {:>7.1} ms {:>7.1} ms", "mean", n_m, u_m); + println!("{:<10} {:>7.1} ms {:>7.1} ms", "median", n_med, u_med); + println!("{:<10} {:>7.1} ms {:>7.1} ms", "p95", n_p95, u_p95); + println!("{:<10} {:>7.1} ms {:>7.1} ms", "p99", n_p99, u_p99); + println!("{:<10} {:>7.1} ms {:>7.1} ms", "σ", n_sd, u_sd); + println!(); + + println!("Numa vs Unbound:"); + println!(" mean: {:+.1} ms ({:+.0}%)", n_m - u_m, (n_m - u_m) / u_m * 100.0); + println!(" median: {:+.1} ms ({:+.0}%)", n_med - u_med, (n_med - u_med) / u_med * 100.0); + println!(" p95: {:+.1} ms ({:+.0}%)", n_p95 - u_p95, (n_p95 - u_p95) / u_p95 * 100.0); + println!(" p99: {:+.1} ms ({:+.0}%)", n_p99 - u_p99, (n_p99 - u_p99) / u_p99 * 100.0); +} + +/// Build a DNS query as a Vec for use with forward_query_raw. +fn build_query_vec(domain: &str) -> Vec { + let mut buf = vec![0u8; 512]; + let len = build_query(&mut buf, domain); + buf.truncate(len); + buf +} + +fn measure R, R>(_rt: &tokio::runtime::Runtime, f: F) -> f64 { + let start = Instant::now(); + f(); + start.elapsed().as_secs_f64() * 1000.0 +} + +fn mean(v: &[f64]) -> f64 { + v.iter().sum::() / v.len() as f64 +} + +fn stddev(v: &[f64]) -> f64 { + let m = mean(v); + let var = v.iter().map(|x| (x - m).powi(2)).sum::() / v.len() as f64; + var.sqrt() +} + +fn median(v: &mut [f64]) -> f64 { + v.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let n = v.len(); + if n % 2 == 0 { + (v[n / 2 - 1] + v[n / 2]) / 2.0 + } else { + v[n / 2] + } +} + +fn percentile(sorted: &[f64], p: f64) -> f64 { + let idx = (p / 100.0 * (sorted.len() - 1) as f64).round() as usize; + sorted[idx.min(sorted.len() - 1)] +} + +fn format_delta(delta: f64) -> String { + if delta > 0.0 { + format!("+{:.1}", delta) + } else { + format!("{:.1}", delta) + } +} + +/// Query a DNS server over UDP. +async fn query_udp(addr: SocketAddr, domain: &str) -> Option<()> { + use tokio::net::UdpSocket; + + let sock = UdpSocket::bind("0.0.0.0:0").await.ok()?; + let mut buf = vec![0u8; 512]; + let len = build_query(&mut buf, domain); + + sock.send_to(&buf[..len], addr).await.ok()?; + + let mut resp = vec![0u8; 4096]; + tokio::time::timeout(Duration::from_secs(10), sock.recv_from(&mut resp)) + .await + .ok()? + .ok()?; + + Some(()) +} + +/// Build a shared Hickory DoH resolver (reuses TLS connection across queries). +async fn build_hickory_resolver() -> hickory_resolver::TokioResolver { + use hickory_resolver::config::*; + + let ns = NameServerConfig { + socket_addr: "9.9.9.9:443".parse().unwrap(), + protocol: hickory_proto::xfer::Protocol::Https, + tls_dns_name: Some("dns.quad9.net".to_string()), + trust_negative_responses: true, + bind_addr: None, + http_endpoint: Some("/dns-query".to_string()), + }; + + let config = ResolverConfig::from_parts(None, vec![], NameServerConfigGroup::from(vec![ns])); + + let mut opts = ResolverOpts::default(); + opts.cache_size = 0; + opts.num_concurrent_reqs = 1; + opts.timeout = Duration::from_secs(10); + + hickory_resolver::TokioResolver::builder_with_config(config, Default::default()) + .with_options(opts) + .build() +} + +/// Query using the shared Hickory resolver. +async fn query_hickory_doh( + resolver: &hickory_resolver::TokioResolver, + domain: &str, +) -> Option<()> { + use hickory_resolver::proto::rr::RecordType; + let _ = resolver.lookup(domain, RecordType::A).await.ok()?; + Some(()) +} + +fn build_query(buf: &mut [u8], domain: &str) -> usize { + let mut pos = 0; + buf[pos..pos + 2].copy_from_slice(&0x1234u16.to_be_bytes()); + pos += 2; + buf[pos..pos + 2].copy_from_slice(&0x0100u16.to_be_bytes()); + pos += 2; + buf[pos..pos + 2].copy_from_slice(&1u16.to_be_bytes()); + pos += 2; + buf[pos..pos + 6].fill(0); + pos += 6; + + for label in domain.split('.') { + buf[pos] = label.len() as u8; + pos += 1; + buf[pos..pos + label.len()].copy_from_slice(label.as_bytes()); + pos += label.len(); + } + buf[pos] = 0; + pos += 1; + buf[pos..pos + 2].copy_from_slice(&1u16.to_be_bytes()); + pos += 2; + buf[pos..pos + 2].copy_from_slice(&1u16.to_be_bytes()); + pos += 2; + pos +} + +fn flush_cache() { + let _ = std::process::Command::new("curl") + .args(["-s", "-X", "DELETE", &format!("http://127.0.0.1:{NUMA_API}/cache")]) + .output(); +} diff --git a/scripts/bench-recursive.sh b/scripts/bench-recursive.sh new file mode 100755 index 0000000..1a1ab71 --- /dev/null +++ b/scripts/bench-recursive.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +# Bench: Numa cold-cache recursive resolution vs dig (forwarded through system resolver) +# +# Measures cold-cache recursive resolution time for Numa. +# Flushes Numa's cache before each query to ensure cold-cache. +# Compares against dig querying a public recursive resolver (no cache advantage). +# +# Usage: ./scripts/bench-recursive.sh [numa_port] + +set -euo pipefail + +NUMA_ADDR="${NUMA_ADDR:-127.0.0.1}" +NUMA_PORT="${NUMA_PORT:-${1:-53}}" +API_PORT="${API_PORT:-5380}" +ROUNDS=3 + +DOMAINS=( + "example.com" + "rust-lang.org" + "kernel.org" + "signal.org" + "archlinux.org" + "openbsd.org" + "git-scm.com" + "sqlite.org" + "wireguard.com" + "mozilla.org" +) + +GREEN='\033[0;32m' +AMBER='\033[0;33m' +CYAN='\033[0;36m' +DIM='\033[0;90m' +BOLD='\033[1m' +RESET='\033[0m' + +echo -e "${CYAN}${BOLD}Recursive DNS Resolution Benchmark${RESET}" +echo -e "${DIM}Numa (cold cache, recursive from root) vs dig @1.1.1.1 (public resolver)${RESET}" +echo -e "${DIM}Rounds per domain: ${ROUNDS}${RESET}" +echo "" + +# Verify Numa is reachable +if ! dig @${NUMA_ADDR} -p ${NUMA_PORT} +short +time=3 +tries=1 example.com A &>/dev/null; then + echo -e "${AMBER}Numa not responding on ${NUMA_ADDR}:${NUMA_PORT}${RESET}" >&2 + exit 1 +fi + +# Verify we can flush cache +if ! curl -s -X DELETE "http://${NUMA_ADDR}:${API_PORT}/cache" &>/dev/null; then + echo -e "${AMBER}Cannot flush cache via API at ${NUMA_ADDR}:${API_PORT}${RESET}" >&2 + exit 1 +fi + +measure_ms() { + local start end + start=$(python3 -c 'import time; print(time.time())') + eval "$1" &>/dev/null + end=$(python3 -c 'import time; print(time.time())') + python3 -c "print(round(($end - $start) * 1000, 1))" +} + +printf "${BOLD}%-22s %10s %10s %8s${RESET}\n" "Domain" "Numa (ms)" "1.1.1.1" "Delta" +printf "%-22s %10s %10s %8s\n" "----------------------" "----------" "----------" "--------" + +numa_total=0 +dig_total=0 +count=0 + +for domain in "${DOMAINS[@]}"; do + numa_sum=0 + dig_sum=0 + + for ((r=1; r<=ROUNDS; r++)); do + # Flush Numa cache + curl -s -X DELETE "http://${NUMA_ADDR}:${API_PORT}/cache" &>/dev/null + sleep 0.05 + + # Measure Numa (recursive from root, cold cache) + ms=$(measure_ms "dig @${NUMA_ADDR} -p ${NUMA_PORT} +short +time=10 +tries=1 ${domain} A") + numa_sum=$(python3 -c "print(round($numa_sum + $ms, 1))") + + # Measure dig against 1.1.1.1 (Cloudflare — warm cache, but shows baseline) + ms=$(measure_ms "dig @1.1.1.1 +short +time=10 +tries=1 ${domain} A") + dig_sum=$(python3 -c "print(round($dig_sum + $ms, 1))") + done + + numa_avg=$(python3 -c "print(round($numa_sum / $ROUNDS, 1))") + dig_avg=$(python3 -c "print(round($dig_sum / $ROUNDS, 1))") + delta=$(python3 -c "d = round($numa_avg - $dig_avg, 1); print(f'+{d}' if d > 0 else str(d))") + + # Color the delta + delta_color="$GREEN" + if python3 -c "exit(0 if $numa_avg > $dig_avg * 1.5 else 1)" 2>/dev/null; then + delta_color="$AMBER" + fi + + printf "%-22s %8s ms %8s ms ${delta_color}%6s ms${RESET}\n" "$domain" "$numa_avg" "$dig_avg" "$delta" + + numa_total=$(python3 -c "print(round($numa_total + $numa_avg, 1))") + dig_total=$(python3 -c "print(round($dig_total + $dig_avg, 1))") + count=$((count + 1)) +done + +echo "" +numa_mean=$(python3 -c "print(round($numa_total / $count, 1))") +dig_mean=$(python3 -c "print(round($dig_total / $count, 1))") +delta_mean=$(python3 -c "d = round($numa_mean - $dig_mean, 1); print(f'+{d}' if d > 0 else str(d))") + +printf "${BOLD}%-22s %8s ms %8s ms %6s ms${RESET}\n" "AVERAGE" "$numa_mean" "$dig_mean" "$delta_mean" + +echo "" +echo -e "${DIM}Note: Numa resolves recursively from root hints (cold cache).${RESET}" +echo -e "${DIM}1.1.1.1 serves from Cloudflare's global cache (warm). The comparison${RESET}" +echo -e "${DIM}is intentionally unfair — it shows Numa's worst case vs the best case${RESET}" +echo -e "${DIM}of a global anycast resolver. Cached Numa queries resolve in <1ms.${RESET}" diff --git a/src/api.rs b/src/api.rs index a0bae58..e638fba 100644 --- a/src/api.rs +++ b/src/api.rs @@ -1029,6 +1029,7 @@ mod tests { upstream_port: 53, lan_ip: Mutex::new(std::net::Ipv4Addr::LOCALHOST), timeout: std::time::Duration::from_secs(3), + hedge_delay: std::time::Duration::ZERO, proxy_tld: "numa".to_string(), proxy_tld_suffix: ".numa".to_string(), lan_enabled: false, diff --git a/src/cache.rs b/src/cache.rs index 5bdde85..82795bc 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,9 +1,10 @@ use std::collections::HashMap; use std::time::{Duration, Instant}; +use crate::buffer::BytePacketBuffer; use crate::packet::DnsPacket; use crate::question::QueryType; -use crate::record::DnsRecord; +use crate::wire::WireMeta; #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub enum DnssecStatus { @@ -26,14 +27,16 @@ impl DnssecStatus { } struct CacheEntry { - packet: DnsPacket, + wire: Vec, + meta: WireMeta, inserted_at: Instant, ttl: Duration, dnssec_status: DnssecStatus, } -/// DNS cache using a two-level map (domain -> query_type -> entry) so that -/// lookups can borrow `&str` instead of allocating a `String` key. +const STALE_WINDOW: Duration = Duration::from_secs(3600); + +/// DNS cache with serve-stale (RFC 8767). Stores raw wire bytes. pub struct DnsCache { entries: HashMap>, entry_count: usize, @@ -53,6 +56,80 @@ impl DnsCache { } } + /// Look up cached wire bytes, patching ID and TTLs in the returned copy. + /// Implements serve-stale (RFC 8767): expired entries within STALE_WINDOW + /// are returned with TTL=1 and `stale=true` so callers can revalidate. + pub fn lookup_wire( + &self, + domain: &str, + qtype: QueryType, + new_id: u16, + ) -> Option<(Vec, DnssecStatus, bool)> { + let type_map = self.entries.get(domain)?; + let entry = type_map.get(&qtype)?; + + let elapsed = entry.inserted_at.elapsed(); + let (remaining, stale) = if elapsed < entry.ttl { + let secs = (entry.ttl - elapsed).as_secs() as u32; + (secs.max(1), false) + } else if elapsed < entry.ttl + STALE_WINDOW { + (1, true) + } else { + return None; + }; + + let mut wire = entry.wire.clone(); + crate::wire::patch_id(&mut wire, new_id); + crate::wire::patch_ttls(&mut wire, &entry.meta.ttl_offsets, remaining); + + Some((wire, entry.dnssec_status, stale)) + } + + pub fn insert_wire( + &mut self, + domain: &str, + qtype: QueryType, + wire: &[u8], + dnssec_status: DnssecStatus, + ) { + let meta = match crate::wire::scan_ttl_offsets(wire) { + Ok(m) => m, + Err(_) => return, // malformed wire, skip + }; + + if self.entry_count >= self.max_entries { + self.evict_expired(); + if self.entry_count >= self.max_entries { + return; + } + } + + let min_ttl = crate::wire::min_ttl_from_wire(wire, &meta) + .unwrap_or(self.min_ttl) + .clamp(self.min_ttl, self.max_ttl); + + let type_map = if let Some(existing) = self.entries.get_mut(domain) { + existing + } else { + self.entries.entry(domain.to_string()).or_default() + }; + + if !type_map.contains_key(&qtype) { + self.entry_count += 1; + } + + type_map.insert( + qtype, + CacheEntry { + wire: wire.to_vec(), + meta, + inserted_at: Instant::now(), + ttl: Duration::from_secs(min_ttl as u64), + dnssec_status, + }, + ); + } + /// Read-only lookup — expired entries are left in place (cleaned up on insert). pub fn lookup(&self, domain: &str, qtype: QueryType) -> Option { self.lookup_with_status(domain, qtype).map(|(pkt, _)| pkt) @@ -63,23 +140,28 @@ impl DnsCache { domain: &str, qtype: QueryType, ) -> Option<(DnsPacket, DnssecStatus)> { - let type_map = self.entries.get(domain)?; - let entry = type_map.get(&qtype)?; + let (wire, status, _stale) = self.lookup_wire(domain, qtype, 0)?; + let mut buf = BytePacketBuffer::from_bytes(&wire); + let pkt = DnsPacket::from_buffer(&mut buf).ok()?; + Some((pkt, status)) + } - let elapsed = entry.inserted_at.elapsed(); - if elapsed >= entry.ttl { - return None; + pub fn insert(&mut self, domain: &str, qtype: QueryType, packet: &DnsPacket) { + self.insert_with_status(domain, qtype, packet, DnssecStatus::Indeterminate); + } + + pub fn insert_with_status( + &mut self, + domain: &str, + qtype: QueryType, + packet: &DnsPacket, + dnssec_status: DnssecStatus, + ) { + let mut buf = BytePacketBuffer::new(); + if packet.write(&mut buf).is_err() { + return; } - - let remaining_secs = (entry.ttl - elapsed).as_secs() as u32; - let remaining = remaining_secs.max(1); - - let mut packet = entry.packet.clone(); - adjust_ttls(&mut packet.answers, remaining); - adjust_ttls(&mut packet.authorities, remaining); - adjust_ttls(&mut packet.resources, remaining); - - Some((packet, entry.dnssec_status)) + self.insert_wire(domain, qtype, buf.filled(), dnssec_status); } pub fn ttl_remaining(&self, domain: &str, qtype: QueryType) -> Option<(u32, u32)> { @@ -105,49 +187,6 @@ impl DnsCache { false } - pub fn insert(&mut self, domain: &str, qtype: QueryType, packet: &DnsPacket) { - self.insert_with_status(domain, qtype, packet, DnssecStatus::Indeterminate); - } - - pub fn insert_with_status( - &mut self, - domain: &str, - qtype: QueryType, - packet: &DnsPacket, - dnssec_status: DnssecStatus, - ) { - if self.entry_count >= self.max_entries { - self.evict_expired(); - if self.entry_count >= self.max_entries { - return; - } - } - - let min_ttl = extract_min_ttl(&packet.answers) - .unwrap_or(self.min_ttl) - .clamp(self.min_ttl, self.max_ttl); - - let type_map = if let Some(existing) = self.entries.get_mut(domain) { - existing - } else { - self.entries.entry(domain.to_string()).or_default() - }; - - if !type_map.contains_key(&qtype) { - self.entry_count += 1; - } - - type_map.insert( - qtype, - CacheEntry { - packet: packet.clone(), - inserted_at: Instant::now(), - ttl: Duration::from_secs(min_ttl as u64), - dnssec_status, - }, - ); - } - pub fn len(&self) -> usize { self.entry_count } @@ -179,7 +218,8 @@ impl DnsCache { + 1; total += type_map.capacity() * inner_slot; for entry in type_map.values() { - total += entry.packet.heap_bytes(); + total += entry.wire.capacity() + + entry.meta.ttl_offsets.capacity() * std::mem::size_of::(); } } total @@ -228,20 +268,11 @@ pub struct CacheInfo { pub ttl_remaining: u32, } -fn extract_min_ttl(records: &[DnsRecord]) -> Option { - records.iter().map(|r| r.ttl()).min() -} - -fn adjust_ttls(records: &mut [DnsRecord], new_ttl: u32) { - for record in records.iter_mut() { - record.set_ttl(new_ttl); - } -} - #[cfg(test)] mod tests { use super::*; use crate::packet::DnsPacket; + use crate::record::DnsRecord; #[test] fn heap_bytes_grows_with_entries() { diff --git a/src/config.rs b/src/config.rs index ae9f685..5f9db73 100644 --- a/src/config.rs +++ b/src/config.rs @@ -138,6 +138,8 @@ pub struct UpstreamConfig { pub fallback: Vec, #[serde(default = "default_timeout_ms")] pub timeout_ms: u64, + #[serde(default = "default_hedge_ms")] + pub hedge_ms: u64, #[serde(default = "default_root_hints")] pub root_hints: Vec, #[serde(default = "default_prime_tlds")] @@ -154,6 +156,7 @@ impl Default for UpstreamConfig { port: default_upstream_port(), fallback: Vec::new(), timeout_ms: default_timeout_ms(), + hedge_ms: default_hedge_ms(), root_hints: default_root_hints(), prime_tlds: default_prime_tlds(), srtt: default_srtt(), @@ -271,6 +274,9 @@ fn default_upstream_port() -> u16 { fn default_timeout_ms() -> u64 { 5000 } +fn default_hedge_ms() -> u64 { + 10 +} #[derive(Deserialize)] pub struct CacheConfig { diff --git a/src/ctx.rs b/src/ctx.rs index 3ef6a0a..2b26a06 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -16,7 +16,9 @@ use crate::blocklist::BlocklistStore; use crate::buffer::BytePacketBuffer; use crate::cache::{DnsCache, DnssecStatus}; use crate::config::{UpstreamMode, ZoneMap}; -use crate::forward::{forward_query, forward_with_failover, Upstream, UpstreamPool}; +use crate::forward::{ + forward_query_raw, forward_with_failover_raw, Upstream, UpstreamPool, +}; use crate::header::ResultCode; use crate::health::HealthMeta; use crate::lan::PeerStore; @@ -47,6 +49,7 @@ pub struct ServerCtx { pub upstream_port: u16, pub lan_ip: Mutex, pub timeout: Duration, + pub hedge_delay: Duration, pub proxy_tld: String, pub proxy_tld_suffix: String, // pre-computed ".{tld}" to avoid per-query allocation pub lan_enabled: bool, @@ -81,6 +84,7 @@ pub struct ServerCtx { /// (and logging parse errors) before calling this function. pub async fn resolve_query( query: DnsPacket, + raw_wire: &[u8], src_addr: SocketAddr, ctx: &ServerCtx, ) -> crate::Result { @@ -177,9 +181,8 @@ pub async fn resolve_query( // Conditional forwarding takes priority over recursive mode // (e.g. Tailscale .ts.net, VPC private zones) let upstream = Upstream::Udp(fwd_addr); - match forward_query(&query, &upstream, ctx.timeout).await { + match forward_and_cache(raw_wire, &upstream, ctx, &qname, qtype).await { Ok(resp) => { - ctx.cache.write().unwrap().insert(&qname, qtype, &resp); (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate) } Err(e) => { @@ -221,10 +224,19 @@ pub async fn resolve_query( (resp, path, DnssecStatus::Indeterminate) } else { let pool = ctx.upstream_pool.lock().unwrap().clone(); - match forward_with_failover(&query, &pool, &ctx.srtt, ctx.timeout).await { - Ok(resp) => { - ctx.cache.write().unwrap().insert(&qname, qtype, &resp); - (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate) + match forward_with_failover_raw(raw_wire, &pool, &ctx.srtt, ctx.timeout, ctx.hedge_delay).await { + Ok(resp_wire) => { + ctx.cache.write().unwrap().insert_wire( + &qname, qtype, &resp_wire, DnssecStatus::Indeterminate, + ); + let mut buf = BytePacketBuffer::from_bytes(&resp_wire); + match DnsPacket::from_buffer(&mut buf) { + Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), + Err(e) => { + error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e); + (DnsPacket::response_from(&query, ResultCode::SERVFAIL), QueryPath::UpstreamError, DnssecStatus::Indeterminate) + } + } } Err(e) => { error!( @@ -347,12 +359,29 @@ pub async fn resolve_query( Ok(resp_buffer) } -/// Handle a DNS query received over UDP. Thin wrapper around resolve_query. +async fn forward_and_cache( + wire: &[u8], + upstream: &Upstream, + ctx: &ServerCtx, + qname: &str, + qtype: QueryType, +) -> crate::Result { + let resp_wire = forward_query_raw(wire, upstream, ctx.timeout).await?; + ctx.cache + .write() + .unwrap() + .insert_wire(qname, qtype, &resp_wire, DnssecStatus::Indeterminate); + let mut buf = BytePacketBuffer::from_bytes(&resp_wire); + DnsPacket::from_buffer(&mut buf) +} + pub async fn handle_query( mut buffer: BytePacketBuffer, + raw_len: usize, src_addr: SocketAddr, ctx: &ServerCtx, ) -> crate::Result<()> { + let raw_wire = buffer.buf[..raw_len].to_vec(); let query = match DnsPacket::from_buffer(&mut buffer) { Ok(packet) => packet, Err(e) => { @@ -360,7 +389,7 @@ pub async fn handle_query( return Ok(()); } }; - match resolve_query(query, src_addr, ctx).await { + match resolve_query(query, &raw_wire, src_addr, ctx).await { Ok(resp_buffer) => { ctx.socket.send_to(resp_buffer.filled(), src_addr).await?; } diff --git a/src/doh.rs b/src/doh.rs index cf50b31..e31b6fe 100644 --- a/src/doh.rs +++ b/src/doh.rs @@ -82,7 +82,7 @@ async fn resolve_doh(dns_bytes: &[u8], src: SocketAddr, ctx: &ServerCtx) -> Resp let query_rd = query.header.recursion_desired; let questions = query.questions.clone(); - match resolve_query(query, src, ctx).await { + match resolve_query(query, dns_bytes, src, ctx).await { Ok(resp_buffer) => { let min_ttl = extract_min_ttl(resp_buffer.filled()); dns_response(resp_buffer.filled(), min_ttl) @@ -102,11 +102,10 @@ async fn resolve_doh(dns_bytes: &[u8], src: SocketAddr, ctx: &ServerCtx) -> Resp } fn extract_min_ttl(wire: &[u8]) -> u32 { - let mut buf = BytePacketBuffer::from_bytes(wire); - match DnsPacket::from_buffer(&mut buf) { - Ok(pkt) => pkt.answers.iter().map(|r| r.ttl()).min().unwrap_or(0), - Err(_) => 0, - } + crate::wire::scan_ttl_offsets(wire) + .ok() + .and_then(|meta| crate::wire::min_ttl_from_wire(wire, &meta)) + .unwrap_or(0) } fn dns_response(wire: &[u8], min_ttl: u32) -> Response { diff --git a/src/dot.rs b/src/dot.rs index 0d48fa2..4513f60 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -177,8 +177,7 @@ where break; }; - // Parse query up-front so we can echo its question section in SERVFAIL - // responses when resolve_query fails. + let raw_wire = buffer.buf[..msg_len].to_vec(); let query = match DnsPacket::from_buffer(&mut buffer) { Ok(q) => q, Err(e) => { @@ -200,7 +199,7 @@ where } }; - match resolve_query(query.clone(), remote_addr, ctx).await { + match resolve_query(query.clone(), &raw_wire, remote_addr, ctx).await { Ok(resp_buffer) => { if write_framed(&mut stream, resp_buffer.filled()) .await @@ -370,6 +369,7 @@ mod tests { upstream_port: 53, lan_ip: Mutex::new(std::net::Ipv4Addr::LOCALHOST), timeout: Duration::from_millis(200), + hedge_delay: Duration::ZERO, proxy_tld: "numa".to_string(), proxy_tld_suffix: ".numa".to_string(), lan_enabled: false, diff --git a/src/forward.rs b/src/forward.rs index ea2f1e2..401ae1c 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -65,6 +65,13 @@ pub fn parse_upstream(s: &str, default_port: u16) -> Result { if s.starts_with("https://") { let client = reqwest::Client::builder() .use_rustls_tls() + .http2_initial_stream_window_size(65_535) + .http2_initial_connection_window_size(65_535) + .http2_keep_alive_interval(Duration::from_secs(15)) + .http2_keep_alive_while_idle(true) + .http2_keep_alive_timeout(Duration::from_secs(10)) + .pool_idle_timeout(Duration::from_secs(300)) + .pool_max_idle_per_host(1) .build() .unwrap_or_default(); return Ok(Upstream::Doh { @@ -325,13 +332,170 @@ async fn forward_doh( let mut send_buffer = BytePacketBuffer::new(); query.write(&mut send_buffer)?; + let resp_bytes = forward_doh_raw(send_buffer.filled(), url, client, timeout_duration).await?; + let mut recv_buffer = BytePacketBuffer::from_bytes(&resp_bytes); + DnsPacket::from_buffer(&mut recv_buffer) +} + +pub async fn forward_query_raw( + wire: &[u8], + upstream: &Upstream, + timeout_duration: Duration, +) -> Result> { + match upstream { + Upstream::Udp(addr) => forward_udp_raw(wire, *addr, timeout_duration).await, + Upstream::Doh { url, client } => forward_doh_raw(wire, url, client, timeout_duration).await, + } +} + +pub async fn forward_with_hedging_raw( + wire: &[u8], + primary: &Upstream, + secondary: &Upstream, + hedge_delay: Duration, + timeout_duration: Duration, +) -> Result> { + use tokio::time::sleep; + + let primary_fut = forward_query_raw(wire, primary, timeout_duration); + tokio::pin!(primary_fut); + + let delay = sleep(hedge_delay); + tokio::pin!(delay); + + // Phase 1: wait for either primary to return, or the hedge delay. + tokio::select! { + result = &mut primary_fut => return result, + _ = &mut delay => {} + } + + // Phase 2: hedge delay expired — fire secondary while still polling primary. + let secondary_fut = forward_query_raw(wire, secondary, timeout_duration); + tokio::pin!(secondary_fut); + + // First successful response wins. If one errors, wait for the other. + let mut primary_err: Option = None; + let mut secondary_err: Option = None; + + loop { + tokio::select! { + r = &mut primary_fut, if primary_err.is_none() => { + match r { + Ok(resp) => return Ok(resp), + Err(e) => { + if let Some(se) = secondary_err.take() { + return Err(se); + } + primary_err = Some(e); + } + } + } + r = &mut secondary_fut, if secondary_err.is_none() => { + match r { + Ok(resp) => return Ok(resp), + Err(e) => { + if let Some(pe) = primary_err.take() { + return Err(pe); + } + secondary_err = Some(e); + } + } + } + } + + match (primary_err, secondary_err) { + (Some(pe), Some(_)) => return Err(pe), + (pe, se) => { primary_err = pe; secondary_err = se; } + } + } +} + +pub async fn forward_with_failover_raw( + wire: &[u8], + pool: &UpstreamPool, + srtt: &RwLock, + timeout_duration: Duration, + hedge_delay: Duration, +) -> Result> { + let mut candidates: Vec<(usize, u64)> = pool + .primary + .iter() + .enumerate() + .map(|(i, u)| { + let rtt = match u { + Upstream::Udp(addr) => srtt.read().unwrap().get(addr.ip()), + _ => 0, + }; + (i, rtt) + }) + .collect(); + candidates.sort_by_key(|&(_, rtt)| rtt); + + let all_upstreams: Vec<&Upstream> = candidates + .iter() + .map(|&(i, _)| &pool.primary[i]) + .chain(pool.fallback.iter()) + .collect(); + + let mut last_err: Option> = None; + + for upstream in &all_upstreams { + let start = Instant::now(); + let result = if !hedge_delay.is_zero() && matches!(upstream, Upstream::Doh { .. }) { + // Hedge against the same upstream: parallel h2 streams on same + // connection. Independent stream scheduling rescues dispatch spikes. + forward_with_hedging_raw(wire, upstream, upstream, hedge_delay, timeout_duration).await + } else { + forward_query_raw(wire, upstream, timeout_duration).await + }; + match result { + Ok(resp) => { + if let Upstream::Udp(addr) = upstream { + let rtt_ms = start.elapsed().as_millis() as u64; + srtt.write().unwrap().record_rtt(addr.ip(), rtt_ms, false); + } + return Ok(resp); + } + Err(e) => { + if let Upstream::Udp(addr) = upstream { + srtt.write().unwrap().record_failure(addr.ip()); + } + log::debug!("upstream {} failed: {}", upstream, e); + last_err = Some(e); + } + } + } + + Err(last_err.unwrap_or_else(|| "no upstream configured".into())) +} + +async fn forward_udp_raw( + wire: &[u8], + upstream: SocketAddr, + timeout_duration: Duration, +) -> Result> { + let socket = UdpSocket::bind("0.0.0.0:0").await?; + socket.send_to(wire, upstream).await?; + + let mut recv_buf = vec![0u8; 4096]; + let (size, _) = timeout(timeout_duration, socket.recv_from(&mut recv_buf)).await??; + recv_buf.truncate(size); + Ok(recv_buf) +} + +async fn forward_doh_raw( + wire: &[u8], + url: &str, + client: &reqwest::Client, + timeout_duration: Duration, +) -> Result> { let resp = timeout( timeout_duration, client .post(url) .header("content-type", "application/dns-message") .header("accept", "application/dns-message") - .body(send_buffer.filled().to_vec()) + .body(wire.to_vec()) .send(), ) .await?? @@ -339,9 +503,25 @@ async fn forward_doh( let bytes = resp.bytes().await?; log::debug!("DoH response: {} bytes", bytes.len()); + Ok(bytes.to_vec()) +} - let mut recv_buffer = BytePacketBuffer::from_bytes(&bytes); - DnsPacket::from_buffer(&mut recv_buffer) +/// Send a lightweight keepalive query to a DoH upstream to prevent +/// the HTTP/2 + TLS connection from going idle and being torn down. +pub async fn keepalive_doh(upstream: &Upstream) { + if let Upstream::Doh { url, client } = upstream { + // Query for . NS — minimal, always succeeds, response is small + let wire: &[u8] = &[ + 0x00, 0x00, // ID + 0x01, 0x00, // flags: RD=1 + 0x00, 0x01, // QDCOUNT=1 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // AN=0, NS=0, AR=0 + 0x00, // root name (.) + 0x00, 0x02, // type NS + 0x00, 0x01, // class IN + ]; + let _ = forward_doh_raw(wire, url, client, Duration::from_secs(5)).await; + } } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index 4074020..92a0b00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,7 @@ pub mod srtt; pub mod stats; pub mod system_dns; pub mod tls; +pub mod wire; pub type Error = Box; pub type Result = std::result::Result; diff --git a/src/main.rs b/src/main.rs index 7592186..0211a59 100644 --- a/src/main.rs +++ b/src/main.rs @@ -297,6 +297,7 @@ async fn main() -> numa::Result<()> { upstream_port: config.upstream.port, lan_ip: Mutex::new(numa::lan::detect_lan_ip().unwrap_or(std::net::Ipv4Addr::LOCALHOST)), timeout: Duration::from_millis(config.upstream.timeout_ms), + hedge_delay: Duration::from_millis(config.upstream.hedge_ms), proxy_tld_suffix: if config.proxy.tld.is_empty() { String::new() } else { @@ -511,6 +512,14 @@ async fn main() -> numa::Result<()> { }); } + // Spawn DoH connection keepalive — prevents idle TLS teardown + { + let keepalive_ctx = Arc::clone(&ctx); + tokio::spawn(async move { + doh_keepalive_loop(keepalive_ctx).await; + }); + } + // Spawn HTTP API server let api_ctx = Arc::clone(&ctx); let api_addr: SocketAddr = format!("{}:{}", config.server.api_bind_addr, api_port).parse()?; @@ -590,7 +599,7 @@ async fn main() -> numa::Result<()> { #[allow(clippy::infinite_loop)] loop { let mut buffer = BytePacketBuffer::new(); - let (_, src_addr) = match ctx.socket.recv_from(&mut buffer.buf).await { + let (len, src_addr) = match ctx.socket.recv_from(&mut buffer.buf).await { Ok(r) => r, Err(e) if e.kind() == std::io::ErrorKind::ConnectionReset => { // Windows delivers ICMP port-unreachable as ConnectionReset on UDP sockets @@ -598,10 +607,11 @@ async fn main() -> numa::Result<()> { } Err(e) => return Err(e.into()), }; + let raw_len = len; let ctx = Arc::clone(&ctx); tokio::spawn(async move { - if let Err(e) = handle_query(buffer, src_addr, &ctx).await { + if let Err(e) = handle_query(buffer, raw_len, src_addr, &ctx).await { error!("{} | HANDLER ERROR | {}", src_addr, e); } }); @@ -777,6 +787,18 @@ async fn warm_domain(ctx: &ServerCtx, domain: &str) { } } +async fn doh_keepalive_loop(ctx: Arc) { + let mut interval = tokio::time::interval(Duration::from_secs(25)); + interval.tick().await; // skip first immediate tick + loop { + interval.tick().await; + let pool = ctx.upstream_pool.lock().unwrap().clone(); + if let Some(upstream) = pool.preferred() { + numa::forward::keepalive_doh(upstream).await; + } + } +} + async fn cache_warm_loop(ctx: Arc, domains: Vec) { tokio::time::sleep(Duration::from_secs(2)).await; diff --git a/src/recursive.rs b/src/recursive.rs index 24d0367..2609f7f 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -202,23 +202,22 @@ pub(crate) fn resolve_iterative<'a>( let mut ns_idx = 0; for _ in 0..MAX_REFERRAL_DEPTH { - let ns_addr = match ns_addrs.get(ns_idx) { - Some(addr) => *addr, - None => return Err("no nameserver available".into()), - }; + if ns_idx >= ns_addrs.len() { + return Err("no nameserver available".into()); + } let (q_name, q_type) = minimize_query(qname, qtype, ¤t_zone); debug!( - "recursive: querying {} for {:?} {} (zone: {}, depth {})", - ns_addr, q_type, q_name, current_zone, referral_depth + "recursive: querying {} (+ hedge) for {:?} {} (zone: {}, depth {})", + ns_addrs[ns_idx], q_type, q_name, current_zone, referral_depth ); - let response = match send_query(q_name, q_type, ns_addr, srtt).await { + let response = match send_query_hedged(q_name, q_type, &ns_addrs[ns_idx..], srtt).await { Ok(r) => r, Err(e) => { - debug!("recursive: NS {} failed: {}", ns_addr, e); - ns_idx += 1; + debug!("recursive: NS query failed: {}", e); + ns_idx += 2; // both tried, skip past them continue; } }; @@ -228,6 +227,9 @@ pub(crate) fn resolve_iterative<'a>( { if let Some(zone) = referral_zone(&response) { current_zone = zone; + let mut cache_w = cache.write().unwrap(); + cache_ns_delegation(&mut cache_w, ¤t_zone, &response); + drop(cache_w); } let mut all_ns = extract_ns_from_records(&response.answers); if all_ns.is_empty() { @@ -296,6 +298,7 @@ pub(crate) fn resolve_iterative<'a>( { let mut cache_w = cache.write().unwrap(); + cache_ns_delegation(&mut cache_w, ¤t_zone, &response); cache_ds_from_authority(&mut cache_w, &response); } let mut new_ns_addrs = resolve_ns_addrs_from_glue(&response, &ns_names, cache); @@ -560,6 +563,23 @@ fn cache_ds_from_authority(cache: &mut DnsCache, response: &DnsPacket) { } } +/// Cache NS delegation records from a referral response so that +/// `find_closest_ns` can skip re-querying TLD servers on subsequent lookups. +fn cache_ns_delegation(cache: &mut DnsCache, zone: &str, response: &DnsPacket) { + let ns_records: Vec<_> = response + .authorities + .iter() + .filter(|r| matches!(r, DnsRecord::NS { .. })) + .cloned() + .collect(); + if ns_records.is_empty() { + return; + } + let mut pkt = make_glue_packet(); + pkt.answers = ns_records; + cache.insert(zone, QueryType::NS, &pkt); +} + fn make_glue_packet() -> DnsPacket { let mut pkt = DnsPacket::new(); pkt.header.response = true; @@ -587,6 +607,91 @@ async fn tcp_with_srtt( } } +/// Smart NS query: fire to two servers simultaneously when SRTT is unknown +/// (cold queries), or to the best server with SRTT-based hedge when known. +async fn send_query_hedged( + qname: &str, + qtype: QueryType, + servers: &[SocketAddr], + srtt: &RwLock, +) -> crate::Result { + if servers.is_empty() { + return Err("no nameserver available".into()); + } + if servers.len() == 1 { + return send_query(qname, qtype, servers[0], srtt).await; + } + + let primary = servers[0]; + let secondary = servers[1]; + let primary_known = srtt.read().unwrap().is_known(primary.ip()); + + if !primary_known { + // Cold: fire both simultaneously, first response wins + debug!( + "recursive: parallel query to {} and {} for {:?} {}", + primary, secondary, qtype, qname + ); + let fut_a = send_query(qname, qtype, primary, srtt); + let fut_b = send_query(qname, qtype, secondary, srtt); + tokio::pin!(fut_a); + tokio::pin!(fut_b); + + // First Ok wins. If one errors, wait for the other. + let mut a_done = false; + let mut b_done = false; + let mut a_err: Option = None; + let mut b_err: Option = None; + + loop { + tokio::select! { + r = &mut fut_a, if !a_done => { + match r { + Ok(resp) => return Ok(resp), + Err(e) => { a_done = true; a_err = Some(e); } + } + } + r = &mut fut_b, if !b_done => { + match r { + Ok(resp) => return Ok(resp), + Err(e) => { b_done = true; b_err = Some(e); } + } + } + } + match (a_err.take(), b_err.take()) { + (Some(e), Some(_)) => return Err(e), + (a, b) => { a_err = a; b_err = b; } + } + } + } else { + // Warm: send to best, hedge after SRTT × 3 if slow + let hedge_ms = srtt.read().unwrap().get(primary.ip()) * 3; + let hedge_delay = Duration::from_millis(hedge_ms.max(50)); + + let fut_a = send_query(qname, qtype, primary, srtt); + tokio::pin!(fut_a); + let delay = tokio::time::sleep(hedge_delay); + tokio::pin!(delay); + + tokio::select! { + r = &mut fut_a => return r, + _ = &mut delay => {} + } + + debug!( + "recursive: hedging {} -> {} after {}ms for {:?} {}", + primary, secondary, hedge_ms, qtype, qname + ); + let fut_b = send_query(qname, qtype, secondary, srtt); + tokio::pin!(fut_b); + + tokio::select! { + r = fut_a => r, + r = fut_b => r, + } + } +} + async fn send_query( qname: &str, qtype: QueryType, diff --git a/src/srtt.rs b/src/srtt.rs index f763a37..fe4df1e 100644 --- a/src/srtt.rs +++ b/src/srtt.rs @@ -45,6 +45,11 @@ impl SrttCache { } } + /// Whether we have observed RTT data for this IP. + pub fn is_known(&self, ip: IpAddr) -> bool { + self.entries.contains_key(&ip) + } + /// Apply time-based decay: each DECAY_AFTER_SECS period halves distance to INITIAL. fn decayed_srtt(entry: &SrttEntry) -> u64 { Self::decay_for_age(entry.srtt_ms, entry.updated_at.elapsed().as_secs()) diff --git a/src/wire.rs b/src/wire.rs new file mode 100644 index 0000000..6b68c3a --- /dev/null +++ b/src/wire.rs @@ -0,0 +1,1347 @@ +//! Wire-level DNS utilities: question extraction, TTL offset scanning, and patching. +//! +//! These operate directly on raw DNS wire bytes without full packet parsing, +//! enabling zero-copy forwarding and wire-level caching. + +use crate::question::QueryType; +use crate::Result; + +/// Metadata extracted from scanning a DNS response's wire bytes. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct WireMeta { + /// Byte offsets of every TTL field in answer + authority + additional sections. + /// Each offset points to the first byte of a 4-byte big-endian TTL. + /// EDNS OPT pseudo-records are excluded (their "TTL" is flags, not a real TTL). + pub ttl_offsets: Vec, + /// How many of the offsets belong to the answer section (the first `answer_count` + /// entries). Used to extract min-TTL from answers only. + pub answer_count: usize, +} + +/// Extract the first question's (domain, query type) from raw DNS wire bytes. +/// +/// Reads only the 12-byte header + first question section. Returns the lowercased +/// domain name and query type without allocating a full `DnsPacket`. +pub fn extract_question(wire: &[u8]) -> Result<(String, QueryType)> { + if wire.len() < 12 { + return Err("wire too short for DNS header".into()); + } + let qdcount = u16::from_be_bytes([wire[4], wire[5]]); + if qdcount == 0 { + return Err("no questions in wire".into()); + } + + let mut pos = 12; + let mut domain = String::with_capacity(64); + read_wire_qname(wire, &mut pos, &mut domain)?; + + if pos + 4 > wire.len() { + return Err("wire truncated in question section".into()); + } + let qtype = u16::from_be_bytes([wire[pos], wire[pos + 1]]); + // skip QTYPE(2) + QCLASS(2) + + Ok((domain, QueryType::from_num(qtype))) +} + +/// Scan a DNS response's wire bytes and return metadata about TTL field locations. +/// +/// Walks the header, skips the question section, then for each resource record in +/// answer, authority, and additional sections, records the byte offset of the TTL +/// field. EDNS OPT records (type 41 with root name) are excluded. +pub fn scan_ttl_offsets(wire: &[u8]) -> Result { + if wire.len() < 12 { + return Err("wire too short for DNS header".into()); + } + + let qdcount = u16::from_be_bytes([wire[4], wire[5]]) as usize; + let ancount = u16::from_be_bytes([wire[6], wire[7]]) as usize; + let nscount = u16::from_be_bytes([wire[8], wire[9]]) as usize; + let arcount = u16::from_be_bytes([wire[10], wire[11]]) as usize; + + let mut pos = 12; + + // Skip question section + for _ in 0..qdcount { + skip_wire_name(wire, &mut pos)?; + if pos + 4 > wire.len() { + return Err("wire truncated in question section".into()); + } + pos += 4; // QTYPE(2) + QCLASS(2) + } + + let mut ttl_offsets = Vec::new(); + + // Process answer + authority + additional sections + let section_counts = [ancount, nscount, arcount]; + let mut answer_offset_count = 0; + + for (section_idx, &count) in section_counts.iter().enumerate() { + for _ in 0..count { + // Check if this is an OPT record: root name (0x00) + type 41 + let is_opt = pos < wire.len() + && wire[pos] == 0x00 + && pos + 3 <= wire.len() + && u16::from_be_bytes([wire[pos + 1], wire[pos + 2]]) == 41; + + // Skip name + skip_wire_name(wire, &mut pos)?; + + if pos + 10 > wire.len() { + return Err("wire truncated in resource record".into()); + } + + // TYPE(2) + CLASS(2) = 4 bytes before TTL + let ttl_offset = pos + 4; + + if !is_opt { + ttl_offsets.push(ttl_offset); + if section_idx == 0 { + answer_offset_count += 1; + } + } + + // Skip TYPE(2) + CLASS(2) + TTL(4) + RDLENGTH(2) = 10 bytes + let rdlength = u16::from_be_bytes([wire[pos + 8], wire[pos + 9]]) as usize; + pos += 10 + rdlength; + + if pos > wire.len() { + return Err("wire truncated in resource record RDATA".into()); + } + } + } + + Ok(WireMeta { + ttl_offsets, + answer_count: answer_offset_count, + }) +} + +/// Extract the minimum TTL from the answer section offsets of a wire response. +pub fn min_ttl_from_wire(wire: &[u8], meta: &WireMeta) -> Option { + meta.ttl_offsets + .iter() + .take(meta.answer_count) + .filter_map(|&off| { + if off + 4 <= wire.len() { + Some(u32::from_be_bytes([ + wire[off], + wire[off + 1], + wire[off + 2], + wire[off + 3], + ])) + } else { + None + } + }) + .min() +} + +/// Patch the transaction ID (bytes 0..2) in a DNS wire message. +pub fn patch_id(wire: &mut [u8], new_id: u16) { + let bytes = new_id.to_be_bytes(); + wire[0] = bytes[0]; + wire[1] = bytes[1]; +} + +/// Patch all TTL fields at the given offsets to `new_ttl`. +pub fn patch_ttls(wire: &mut [u8], offsets: &[usize], new_ttl: u32) { + let bytes = new_ttl.to_be_bytes(); + for &off in offsets { + wire[off] = bytes[0]; + wire[off + 1] = bytes[1]; + wire[off + 2] = bytes[2]; + wire[off + 3] = bytes[3]; + } +} + +/// Read a DNS name from wire bytes at `pos`, handling compression pointers. +/// Advances `pos` past the name as it appears at the current position +/// (compression pointer targets do NOT advance `pos`). +fn read_wire_qname(wire: &[u8], pos: &mut usize, out: &mut String) -> Result<()> { + let mut jumped = false; + let mut read_pos = *pos; + let mut jumps = 0; + let max_jumps = 20; + + loop { + if read_pos >= wire.len() { + return Err("wire truncated reading name".into()); + } + let len = wire[read_pos] as usize; + + // Compression pointer: top 2 bits set + if len & 0xC0 == 0xC0 { + if read_pos + 1 >= wire.len() { + return Err("wire truncated in compression pointer".into()); + } + if !jumped { + *pos = read_pos + 2; // advance past the pointer + } + let offset = ((len & 0x3F) << 8) | wire[read_pos + 1] as usize; + read_pos = offset; + jumped = true; + jumps += 1; + if jumps > max_jumps { + return Err("too many compression jumps".into()); + } + continue; + } + + if len == 0 { + if !jumped { + *pos = read_pos + 1; + } + break; + } + + if read_pos + 1 + len > wire.len() { + return Err("wire truncated in name label".into()); + } + + if !out.is_empty() { + out.push('.'); + } + for &b in &wire[read_pos + 1..read_pos + 1 + len] { + out.push(b.to_ascii_lowercase() as char); + } + read_pos += 1 + len; + } + + Ok(()) +} + +/// Skip a DNS name in wire bytes, advancing `pos` past it. +fn skip_wire_name(wire: &[u8], pos: &mut usize) -> Result<()> { + loop { + if *pos >= wire.len() { + return Err("wire truncated skipping name".into()); + } + let len = wire[*pos] as usize; + + if len & 0xC0 == 0xC0 { + *pos += 2; // compression pointer is 2 bytes + return Ok(()); + } + if len == 0 { + *pos += 1; + return Ok(()); + } + *pos += 1 + len; + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::buffer::BytePacketBuffer; + use crate::cache::{DnsCache, DnssecStatus}; + use crate::header::ResultCode; + use crate::packet::{DnsPacket, EdnsOpt}; + use crate::question::DnsQuestion; + use crate::record::DnsRecord; + + // ── Helpers ────────────────────────────────────────────────────── + + /// Serialize a DnsPacket to wire bytes. + fn to_wire(pkt: &DnsPacket) -> Vec { + let mut buf = BytePacketBuffer::new(); + pkt.write(&mut buf).unwrap(); + buf.filled().to_vec() + } + + /// Build a minimal response with given answers. + fn response(id: u16, domain: &str, answers: Vec) -> DnsPacket { + let mut pkt = DnsPacket::new(); + pkt.header.id = id; + pkt.header.response = true; + pkt.header.recursion_desired = true; + pkt.header.recursion_available = true; + pkt.header.rescode = ResultCode::NOERROR; + pkt.questions + .push(DnsQuestion::new(domain.to_string(), QueryType::A)); + pkt.answers = answers; + pkt + } + + fn a_record(domain: &str, ip: &str, ttl: u32) -> DnsRecord { + DnsRecord::A { + domain: domain.into(), + addr: ip.parse().unwrap(), + ttl, + } + } + + fn aaaa_record(domain: &str, ip: &str, ttl: u32) -> DnsRecord { + DnsRecord::AAAA { + domain: domain.into(), + addr: ip.parse().unwrap(), + ttl, + } + } + + fn cname_record(domain: &str, host: &str, ttl: u32) -> DnsRecord { + DnsRecord::CNAME { + domain: domain.into(), + host: host.into(), + ttl, + } + } + + fn ns_record(domain: &str, host: &str, ttl: u32) -> DnsRecord { + DnsRecord::NS { + domain: domain.into(), + host: host.into(), + ttl, + } + } + + fn mx_record(domain: &str, host: &str, priority: u16, ttl: u32) -> DnsRecord { + DnsRecord::MX { + domain: domain.into(), + priority, + host: host.into(), + ttl, + } + } + + // ── A. TTL offset extraction ──────────────────────────────────── + + #[test] + fn scan_single_a_record() { + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert_eq!(meta.ttl_offsets.len(), 1); + assert_eq!(meta.answer_count, 1); + + let off = meta.ttl_offsets[0]; + let ttl = u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]); + assert_eq!(ttl, 300); + } + + #[test] + fn scan_multiple_a_records() { + let pkt = response( + 0x1234, + "example.com", + vec![ + a_record("example.com", "1.2.3.4", 300), + a_record("example.com", "5.6.7.8", 600), + a_record("example.com", "9.10.11.12", 120), + ], + ); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert_eq!(meta.ttl_offsets.len(), 3); + assert_eq!(meta.answer_count, 3); + + let ttls: Vec = meta + .ttl_offsets + .iter() + .map(|&off| u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]])) + .collect(); + assert_eq!(ttls, vec![300, 600, 120]); + } + + #[test] + fn scan_mixed_sections() { + let mut pkt = + response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + pkt.authorities + .push(ns_record("example.com", "ns1.example.com", 3600)); + pkt.authorities + .push(ns_record("example.com", "ns2.example.com", 3600)); + pkt.resources + .push(a_record("ns1.example.com", "10.0.0.1", 1800)); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert_eq!(meta.ttl_offsets.len(), 4); // 1 answer + 2 authority + 1 additional + assert_eq!(meta.answer_count, 1); + } + + #[test] + fn scan_cname_chain() { + let pkt = response( + 0x1234, + "www.example.com", + vec![ + cname_record("www.example.com", "example.com", 300), + a_record("example.com", "1.2.3.4", 600), + ], + ); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert_eq!(meta.ttl_offsets.len(), 2); + assert_eq!(meta.answer_count, 2); + + let ttls: Vec = meta + .ttl_offsets + .iter() + .map(|&off| u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]])) + .collect(); + assert_eq!(ttls, vec![300, 600]); + } + + #[test] + fn scan_compressed_names() { + // Build a packet with name compression (the serializer uses compression + // for repeated domain names). Two A records for the same domain will + // have the second name compressed as a pointer. + let pkt = response( + 0x1234, + "example.com", + vec![ + a_record("example.com", "1.2.3.4", 300), + a_record("example.com", "5.6.7.8", 600), + ], + ); + let wire = to_wire(&pkt); + + // Verify compression is actually present (second name should be a pointer) + // The first answer's name is at some offset, and the second should use 0xC0xx + let meta = scan_ttl_offsets(&wire).unwrap(); + assert_eq!(meta.ttl_offsets.len(), 2); + + let ttls: Vec = meta + .ttl_offsets + .iter() + .map(|&off| u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]])) + .collect(); + assert_eq!(ttls, vec![300, 600]); + } + + #[test] + fn scan_edns_opt_excluded() { + let mut pkt = + response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + pkt.edns = Some(EdnsOpt { + udp_payload_size: 1232, + extended_rcode: 0, + version: 0, + do_bit: false, + options: vec![], + }); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + // Only the A record's TTL, not the OPT pseudo-record's "TTL" + assert_eq!(meta.ttl_offsets.len(), 1); + assert_eq!(meta.answer_count, 1); + } + + #[test] + fn scan_rrsig_only_wire_ttl() { + let mut pkt = + response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + pkt.answers.push(DnsRecord::RRSIG { + domain: "example.com".into(), + type_covered: 1, // A + algorithm: 13, + labels: 2, + original_ttl: 9999, // must NOT appear in offsets + expiration: 1700000000, + inception: 1690000000, + key_tag: 12345, + signer_name: "example.com".into(), + signature: vec![0x01, 0x02, 0x03, 0x04], + ttl: 300, + }); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + // 2 TTL offsets: A record + RRSIG wire TTL + assert_eq!(meta.ttl_offsets.len(), 2); + assert_eq!(meta.answer_count, 2); + + // Both wire TTLs should be 300, not 9999 + for &off in &meta.ttl_offsets { + let ttl = + u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]); + assert_eq!(ttl, 300); + } + + // Verify that 9999 (original_ttl) exists somewhere in the wire but is NOT in offsets + let original_ttl_bytes = 9999u32.to_be_bytes(); + let found_at = wire + .windows(4) + .position(|w| w == original_ttl_bytes) + .expect("original_ttl should be in wire"); + assert!( + !meta.ttl_offsets.contains(&found_at), + "original_ttl offset must not be in ttl_offsets" + ); + } + + #[test] + fn scan_nsec_variable_rdata() { + let mut pkt = + response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + pkt.authorities.push(DnsRecord::NSEC { + domain: "example.com".into(), + next_domain: "z.example.com".into(), + type_bitmap: vec![0x00, 0x06, 0x40, 0x01, 0x00, 0x00, 0x00, 0x03], + ttl: 1800, + }); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert_eq!(meta.ttl_offsets.len(), 2); // A + NSEC + assert_eq!(meta.answer_count, 1); + + let nsec_ttl_off = meta.ttl_offsets[1]; + let ttl = u32::from_be_bytes([ + wire[nsec_ttl_off], + wire[nsec_ttl_off + 1], + wire[nsec_ttl_off + 2], + wire[nsec_ttl_off + 3], + ]); + assert_eq!(ttl, 1800); + } + + #[test] + fn scan_empty_response() { + let pkt = response(0x1234, "nxdomain.example.com", vec![]); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert!(meta.ttl_offsets.is_empty()); + assert_eq!(meta.answer_count, 0); + } + + #[test] + fn scan_unknown_record_type() { + // Manually build a response with an unknown type (99) using raw wire bytes + let mut pkt = response(0x1234, "example.com", vec![]); + pkt.answers.push(DnsRecord::UNKNOWN { + domain: "example.com".into(), + qtype: 99, + data: vec![0xDE, 0xAD, 0xBE, 0xEF], + ttl: 500, + }); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert_eq!(meta.ttl_offsets.len(), 1); + let off = meta.ttl_offsets[0]; + let ttl = u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]); + assert_eq!(ttl, 500); + } + + #[test] + fn scan_truncated_wire_returns_error() { + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let wire = to_wire(&pkt); + // Truncate mid-record + let truncated = &wire[..wire.len() - 2]; + assert!(scan_ttl_offsets(truncated).is_err()); + } + + #[test] + fn scan_too_short_for_header() { + assert!(scan_ttl_offsets(&[0u8; 5]).is_err()); + } + + #[test] + fn scan_query_packet_no_offsets() { + let pkt = DnsPacket::query(0x1234, "example.com", QueryType::A); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + assert!(meta.ttl_offsets.is_empty()); + } + + // ── B. TTL patching ───────────────────────────────────────────── + + #[test] + fn patch_ttl_single() { + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + patch_ttls(&mut wire, &meta.ttl_offsets, 120); + + let off = meta.ttl_offsets[0]; + assert_eq!( + u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]), + 120 + ); + } + + #[test] + fn patch_ttl_multiple() { + let pkt = response( + 0x1234, + "example.com", + vec![ + a_record("example.com", "1.2.3.4", 300), + a_record("example.com", "5.6.7.8", 600), + a_record("example.com", "9.10.11.12", 900), + ], + ); + let mut wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + patch_ttls(&mut wire, &meta.ttl_offsets, 42); + + for &off in &meta.ttl_offsets { + assert_eq!( + u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]), + 42 + ); + } + } + + #[test] + fn patch_ttl_preserves_other_bytes() { + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let original = to_wire(&pkt); + let mut patched = original.clone(); + let meta = scan_ttl_offsets(&patched).unwrap(); + + patch_ttls(&mut patched, &meta.ttl_offsets, 120); + + // Every byte outside TTL offsets should be identical + for (i, (&orig, &patc)) in original.iter().zip(patched.iter()).enumerate() { + let in_ttl = meta + .ttl_offsets + .iter() + .any(|&off| i >= off && i < off + 4); + if !in_ttl { + assert_eq!( + orig, patc, + "byte {} changed (outside TTL): orig={:#04x}, patched={:#04x}", + i, orig, patc + ); + } + } + } + + #[test] + fn patch_ttl_zero() { + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + patch_ttls(&mut wire, &meta.ttl_offsets, 0); + + let off = meta.ttl_offsets[0]; + assert_eq!(&wire[off..off + 4], &[0, 0, 0, 0]); + } + + #[test] + fn patch_ttl_max_u32() { + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + patch_ttls(&mut wire, &meta.ttl_offsets, u32::MAX); + + let off = meta.ttl_offsets[0]; + assert_eq!(&wire[off..off + 4], &[0xFF, 0xFF, 0xFF, 0xFF]); + } + + #[test] + fn patch_ttl_edns_untouched() { + let mut pkt = + response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + pkt.edns = Some(EdnsOpt { + udp_payload_size: 1232, + extended_rcode: 0, + version: 0, + do_bit: true, + options: vec![], + }); + let original = to_wire(&pkt); + let mut patched = original.clone(); + let meta = scan_ttl_offsets(&patched).unwrap(); + + patch_ttls(&mut patched, &meta.ttl_offsets, 42); + + // Only the A record's TTL bytes should differ; everything else + // (including the OPT "TTL" containing the DO bit) must be unchanged. + for (i, (&orig, &patc)) in original.iter().zip(patched.iter()).enumerate() { + let in_ttl = meta + .ttl_offsets + .iter() + .any(|&off| i >= off && i < off + 4); + if !in_ttl { + assert_eq!( + orig, patc, + "byte {} changed (outside TTL): orig={:#04x}, patched={:#04x}", + i, orig, patc + ); + } + } + } + + // ── C. ID patching ────────────────────────────────────────────── + + #[test] + fn patch_id_basic() { + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut wire = to_wire(&pkt); + + patch_id(&mut wire, 0xABCD); + assert_eq!(&wire[0..2], &[0xAB, 0xCD]); + } + + #[test] + fn patch_id_preserves_flags() { + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let original = to_wire(&pkt); + let mut patched = original.clone(); + + patch_id(&mut patched, 0x9999); + + // Bytes 2..12 (flags + counts) unchanged + assert_eq!(&original[2..12], &patched[2..12]); + } + + #[test] + fn patch_id_zero() { + let pkt = response(0xFFFF, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut wire = to_wire(&pkt); + + patch_id(&mut wire, 0x0000); + assert_eq!(&wire[0..2], &[0x00, 0x00]); + } + + // ── D. extract_question ───────────────────────────────────────── + + #[test] + fn extract_question_basic() { + let pkt = DnsPacket::query(0x1234, "Example.COM", QueryType::A); + let wire = to_wire(&pkt); + let (domain, qtype) = extract_question(&wire).unwrap(); + + assert_eq!(domain, "example.com"); // lowercased + assert_eq!(qtype, QueryType::A); + } + + #[test] + fn extract_question_aaaa() { + let pkt = DnsPacket::query(0x1234, "rust-lang.org", QueryType::AAAA); + let wire = to_wire(&pkt); + let (domain, qtype) = extract_question(&wire).unwrap(); + + assert_eq!(domain, "rust-lang.org"); + assert_eq!(qtype, QueryType::AAAA); + } + + #[test] + fn extract_question_too_short() { + assert!(extract_question(&[0u8; 5]).is_err()); + } + + #[test] + fn extract_question_no_questions() { + let mut wire = to_wire(&DnsPacket::query(0x1234, "example.com", QueryType::A)); + // Zero out QDCOUNT (bytes 4-5) + wire[4] = 0; + wire[5] = 0; + assert!(extract_question(&wire).is_err()); + } + + // ── E. min_ttl_from_wire ──────────────────────────────────────── + + #[test] + fn min_ttl_answers_only() { + let mut pkt = response( + 0x1234, + "example.com", + vec![ + a_record("example.com", "1.2.3.4", 300), + a_record("example.com", "5.6.7.8", 60), + ], + ); + pkt.authorities + .push(ns_record("example.com", "ns1.example.com", 10)); // lower but in authority, not answer + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert_eq!(min_ttl_from_wire(&wire, &meta), Some(60)); // from answers only + } + + #[test] + fn min_ttl_empty_answers() { + let pkt = response(0x1234, "example.com", vec![]); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + assert_eq!(min_ttl_from_wire(&wire, &meta), None); + } + + // ── F. Round-trip fidelity ────────────────────────────────────── + // + // These verify that wire bytes → scan → patch → parse produces the + // same semantic content as the original packet. They test the full + // integration path that the wire-level cache will use. + + #[test] + fn round_trip_simple_a() { + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + let mut patched = wire.clone(); + patch_id(&mut patched, 0xABCD); + patch_ttls(&mut patched, &meta.ttl_offsets, 120); + + // Parse the patched wire + let mut buf = BytePacketBuffer::from_bytes(&patched); + let parsed = DnsPacket::from_buffer(&mut buf).unwrap(); + + assert_eq!(parsed.header.id, 0xABCD); + assert_eq!(parsed.answers.len(), 1); + match &parsed.answers[0] { + DnsRecord::A { domain, addr, ttl } => { + assert_eq!(domain, "example.com"); + assert_eq!(*addr, "1.2.3.4".parse::().unwrap()); + assert_eq!(*ttl, 120); + } + other => panic!("expected A record, got {:?}", other), + } + } + + #[test] + fn round_trip_edns_survives() { + let mut pkt = + response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + pkt.edns = Some(EdnsOpt { + udp_payload_size: 1232, + extended_rcode: 0, + version: 0, + do_bit: true, + options: vec![], + }); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + let mut patched = wire.clone(); + patch_ttls(&mut patched, &meta.ttl_offsets, 42); + + let mut buf = BytePacketBuffer::from_bytes(&patched); + let parsed = DnsPacket::from_buffer(&mut buf).unwrap(); + + let edns = parsed.edns.as_ref().expect("EDNS should survive"); + assert_eq!(edns.udp_payload_size, 1232); + assert!(edns.do_bit); + } + + #[test] + fn round_trip_dnssec_full() { + let mut pkt = response( + 0x1234, + "example.com", + vec![ + a_record("example.com", "1.2.3.4", 300), + DnsRecord::RRSIG { + domain: "example.com".into(), + type_covered: 1, + algorithm: 13, + labels: 2, + original_ttl: 300, + expiration: 1700000000, + inception: 1690000000, + key_tag: 12345, + signer_name: "example.com".into(), + signature: vec![1, 2, 3, 4, 5, 6, 7, 8], + ttl: 300, + }, + ], + ); + pkt.authorities.push(DnsRecord::NSEC { + domain: "example.com".into(), + next_domain: "z.example.com".into(), + type_bitmap: vec![0x00, 0x06, 0x40, 0x01, 0x00, 0x00, 0x00, 0x03], + ttl: 300, + }); + pkt.resources.push(DnsRecord::DNSKEY { + domain: "example.com".into(), + flags: 257, + protocol: 3, + algorithm: 13, + public_key: vec![10, 20, 30, 40], + ttl: 3600, + }); + pkt.edns = Some(EdnsOpt { + udp_payload_size: 1232, + extended_rcode: 0, + version: 0, + do_bit: true, + options: vec![], + }); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + // 4 TTL offsets: A + RRSIG (answers) + NSEC (authority) + DNSKEY (additional) + // OPT excluded + assert_eq!(meta.ttl_offsets.len(), 4); + assert_eq!(meta.answer_count, 2); + + let mut patched = wire.clone(); + patch_ttls(&mut patched, &meta.ttl_offsets, 42); + + let mut buf = BytePacketBuffer::from_bytes(&patched); + let parsed = DnsPacket::from_buffer(&mut buf).unwrap(); + + assert_eq!(parsed.answers.len(), 2); + assert_eq!(parsed.authorities.len(), 1); + assert_eq!(parsed.resources.len(), 1); + assert!(parsed.edns.is_some()); + + // All TTLs should be 42 now + for ans in &parsed.answers { + assert_eq!(ans.ttl(), 42); + } + for auth in &parsed.authorities { + assert_eq!(auth.ttl(), 42); + } + for res in &parsed.resources { + assert_eq!(res.ttl(), 42); + } + + // RRSIG original_ttl must be preserved (it's inside RDATA, not a wire TTL) + match &parsed.answers[1] { + DnsRecord::RRSIG { original_ttl, .. } => assert_eq!(*original_ttl, 300), + other => panic!("expected RRSIG, got {:?}", other), + } + } + + #[test] + fn round_trip_nxdomain_soa() { + let mut pkt = DnsPacket::new(); + pkt.header.id = 0x5678; + pkt.header.response = true; + pkt.header.rescode = ResultCode::NXDOMAIN; + pkt.questions + .push(DnsQuestion::new("missing.example.com".into(), QueryType::A)); + // SOA in authority (we don't have a SOA variant, so use NS as proxy for offset testing) + pkt.authorities + .push(ns_record("example.com", "ns1.example.com", 900)); + + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert_eq!(meta.ttl_offsets.len(), 1); + assert_eq!(meta.answer_count, 0); // no answers, only authority + + let mut patched = wire.clone(); + patch_id(&mut patched, 0x9999); + patch_ttls(&mut patched, &meta.ttl_offsets, 60); + + let mut buf = BytePacketBuffer::from_bytes(&patched); + let parsed = DnsPacket::from_buffer(&mut buf).unwrap(); + + assert_eq!(parsed.header.id, 0x9999); + assert_eq!(parsed.header.rescode, ResultCode::NXDOMAIN); + assert_eq!(parsed.authorities[0].ttl(), 60); + } + + #[test] + fn round_trip_mx_record() { + let pkt = response( + 0x1234, + "example.com", + vec![mx_record("example.com", "mail.example.com", 10, 3600)], + ); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + let mut patched = wire.clone(); + patch_ttls(&mut patched, &meta.ttl_offsets, 100); + + let mut buf = BytePacketBuffer::from_bytes(&patched); + let parsed = DnsPacket::from_buffer(&mut buf).unwrap(); + + match &parsed.answers[0] { + DnsRecord::MX { + domain, + priority, + host, + ttl, + } => { + assert_eq!(domain, "example.com"); + assert_eq!(*priority, 10); + assert_eq!(host, "mail.example.com"); + assert_eq!(*ttl, 100); + } + other => panic!("expected MX, got {:?}", other), + } + } + + #[test] + fn round_trip_many_records() { + let answers: Vec = (0..20) + .map(|i| a_record("example.com", &format!("10.0.0.{}", i), 300 + i * 10)) + .collect(); + let pkt = response(0x1234, "example.com", answers); + let wire = to_wire(&pkt); + let meta = scan_ttl_offsets(&wire).unwrap(); + + assert_eq!(meta.ttl_offsets.len(), 20); + + let mut patched = wire.clone(); + patch_ttls(&mut patched, &meta.ttl_offsets, 1); + + let mut buf = BytePacketBuffer::from_bytes(&patched); + let parsed = DnsPacket::from_buffer(&mut buf).unwrap(); + + assert_eq!(parsed.answers.len(), 20); + for ans in &parsed.answers { + assert_eq!(ans.ttl(), 1); + } + } + + // ── G. Edge cases ─────────────────────────────────────────────── + + #[test] + fn scan_rejects_empty_wire() { + assert!(scan_ttl_offsets(&[]).is_err()); + } + + #[test] + fn extract_question_rejects_empty_wire() { + assert!(extract_question(&[]).is_err()); + } + + // ── H. Cache behavior tests ───────────────────────────────────── + // + // These test existing DnsCache behavior that must be preserved after + // the wire-level migration. They use the current parsed-packet API + // and serve as a regression suite. + + #[test] + fn cache_insert_lookup_hit() { + let mut cache = DnsCache::new(100, 1, 3600); + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + cache.insert("example.com", QueryType::A, &pkt); + + let (result, status) = cache + .lookup_with_status("example.com", QueryType::A) + .expect("should hit"); + assert_eq!(result.answers.len(), 1); + assert_eq!(status, DnssecStatus::Indeterminate); + } + + #[test] + fn cache_lookup_adjusts_ttl() { + let mut cache = DnsCache::new(100, 1, 3600); + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + cache.insert("example.com", QueryType::A, &pkt); + + let (result, _) = cache.lookup_with_status("example.com", QueryType::A).unwrap(); + // TTL should be <= 300 (at most original, reduced by elapsed time) + assert!(result.answers[0].ttl() <= 300); + assert!(result.answers[0].ttl() > 0); + } + + #[test] + fn cache_miss_wrong_domain() { + let mut cache = DnsCache::new(100, 1, 3600); + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + cache.insert("example.com", QueryType::A, &pkt); + + assert!(cache + .lookup_with_status("other.com", QueryType::A) + .is_none()); + } + + #[test] + fn cache_miss_wrong_qtype() { + let mut cache = DnsCache::new(100, 1, 3600); + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + cache.insert("example.com", QueryType::A, &pkt); + + assert!(cache + .lookup_with_status("example.com", QueryType::AAAA) + .is_none()); + } + + #[test] + fn cache_overwrite_no_double_count() { + let mut cache = DnsCache::new(100, 1, 3600); + let pkt1 = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt2 = response(0x5678, "example.com", vec![a_record("example.com", "5.6.7.8", 600)]); + + cache.insert("example.com", QueryType::A, &pkt1); + assert_eq!(cache.len(), 1); + + cache.insert("example.com", QueryType::A, &pkt2); + assert_eq!(cache.len(), 1); // no double count + + let (result, _) = cache.lookup_with_status("example.com", QueryType::A).unwrap(); + match &result.answers[0] { + DnsRecord::A { addr, .. } => { + assert_eq!(*addr, "5.6.7.8".parse::().unwrap()) + } + _ => panic!("expected A record"), + } + } + + #[test] + fn cache_ttl_clamped_min() { + let mut cache = DnsCache::new(100, 60, 3600); + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 5)]); + cache.insert("example.com", QueryType::A, &pkt); + + let (remaining, total) = cache.ttl_remaining("example.com", QueryType::A).unwrap(); + assert_eq!(total, 60); // clamped up from 5 + assert!(remaining <= 60); + } + + #[test] + fn cache_ttl_clamped_max() { + let mut cache = DnsCache::new(100, 1, 3600); + let pkt = + response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 999999)]); + cache.insert("example.com", QueryType::A, &pkt); + + let (_, total) = cache.ttl_remaining("example.com", QueryType::A).unwrap(); + assert_eq!(total, 3600); // clamped down from 999999 + } + + #[test] + fn cache_len_empty_clear() { + let mut cache = DnsCache::new(100, 1, 3600); + assert!(cache.is_empty()); + assert_eq!(cache.len(), 0); + + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + cache.insert("example.com", QueryType::A, &pkt); + assert!(!cache.is_empty()); + assert_eq!(cache.len(), 1); + + cache.clear(); + assert!(cache.is_empty()); + assert_eq!(cache.len(), 0); + assert!(cache.lookup("example.com", QueryType::A).is_none()); + } + + #[test] + fn cache_remove_domain() { + let mut cache = DnsCache::new(100, 1, 3600); + let pkt_a = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt_aaaa = response( + 0x5678, + "example.com", + vec![aaaa_record("example.com", "::1", 300)], + ); + cache.insert("example.com", QueryType::A, &pkt_a); + cache.insert("example.com", QueryType::AAAA, &pkt_aaaa); + assert_eq!(cache.len(), 2); + + cache.remove("example.com"); + assert_eq!(cache.len(), 0); + assert!(cache.lookup("example.com", QueryType::A).is_none()); + assert!(cache.lookup("example.com", QueryType::AAAA).is_none()); + } + + #[test] + fn cache_list_entries() { + let mut cache = DnsCache::new(100, 1, 3600); + let pkt_a = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt_b = response(0x5678, "test.org", vec![a_record("test.org", "5.6.7.8", 600)]); + cache.insert("example.com", QueryType::A, &pkt_a); + cache.insert("test.org", QueryType::A, &pkt_b); + + let list = cache.list(); + assert_eq!(list.len(), 2); + let domains: Vec<&str> = list.iter().map(|e| e.domain.as_str()).collect(); + assert!(domains.contains(&"example.com")); + assert!(domains.contains(&"test.org")); + } + + #[test] + fn cache_heap_bytes_grows() { + let mut cache = DnsCache::new(100, 1, 3600); + let empty = cache.heap_bytes(); + + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + cache.insert("example.com", QueryType::A, &pkt); + assert!(cache.heap_bytes() > empty); + } + + #[test] + fn cache_needs_warm_behavior() { + let mut cache = DnsCache::new(100, 1, 3600); + + // Missing → needs warm + assert!(cache.needs_warm("example.com")); + + // Both A and AAAA cached → does not need warm + let pkt_a = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt_aaaa = response( + 0x5678, + "example.com", + vec![aaaa_record("example.com", "::1", 300)], + ); + cache.insert("example.com", QueryType::A, &pkt_a); + cache.insert("example.com", QueryType::AAAA, &pkt_aaaa); + assert!(!cache.needs_warm("example.com")); + + // Only A cached → needs warm (AAAA missing) + cache.remove("example.com"); + cache.insert("example.com", QueryType::A, &pkt_a); + assert!(cache.needs_warm("example.com")); + } + + #[test] + fn cache_ttl_remaining_api() { + let mut cache = DnsCache::new(100, 60, 3600); + assert!(cache.ttl_remaining("missing.com", QueryType::A).is_none()); + + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + cache.insert("example.com", QueryType::A, &pkt); + let (remaining, total) = cache.ttl_remaining("example.com", QueryType::A).unwrap(); + assert_eq!(total, 300); + assert!(remaining > 0); + assert!(remaining <= 300); + } + + #[test] + fn cache_dnssec_status_preserved() { + let mut cache = DnsCache::new(100, 1, 3600); + let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + cache.insert_with_status("example.com", QueryType::A, &pkt, DnssecStatus::Secure); + + let (_, status) = cache + .lookup_with_status("example.com", QueryType::A) + .unwrap(); + assert_eq!(status, DnssecStatus::Secure); + } + + // ── I. Memory footprint baseline ────────────────────────────── + // + // Measures the current parsed-packet cache memory vs what wire-level + // storage would cost for the same entries. This is a baseline — after + // migration, re-run to verify improvement. + + #[test] + fn memory_footprint_baseline() { + let mut cache = DnsCache::new(1000, 1, 3600); + + // Simulate a realistic cache: 50 domains, mix of record types + let domains: Vec = (0..50).map(|i| format!("domain{}.example.com", i)).collect(); + + let mut total_wire_bytes = 0usize; + let mut total_wire_meta_bytes = 0usize; + + for (i, domain) in domains.iter().enumerate() { + // A record + let pkt_a = response( + i as u16, + domain, + vec![ + a_record(domain, &format!("10.0.{}.1", i % 256), 300), + a_record(domain, &format!("10.0.{}.2", i % 256), 300), + ], + ); + cache.insert(domain, QueryType::A, &pkt_a); + + let wire_a = to_wire(&pkt_a); + let meta_a = scan_ttl_offsets(&wire_a).unwrap(); + total_wire_bytes += wire_a.len(); + total_wire_meta_bytes += meta_a.ttl_offsets.len() * std::mem::size_of::(); + + // AAAA record for half of them + if i % 2 == 0 { + let pkt_aaaa = response( + (i + 1000) as u16, + domain, + vec![aaaa_record(domain, &format!("2001:db8::{:x}", i), 600)], + ); + cache.insert(domain, QueryType::AAAA, &pkt_aaaa); + + let wire_aaaa = to_wire(&pkt_aaaa); + let meta_aaaa = scan_ttl_offsets(&wire_aaaa).unwrap(); + total_wire_bytes += wire_aaaa.len(); + total_wire_meta_bytes += + meta_aaaa.ttl_offsets.len() * std::mem::size_of::(); + } + } + + // Compare only the variable per-entry data (what actually differs + // between parsed and wire storage). HashMap overhead, domain keys, + // Instant, Duration, DnssecStatus are identical in both approaches. + let mut parsed_data_bytes = 0usize; + // Re-insert and measure just packet.heap_bytes() per entry + { + let mut cache2 = DnsCache::new(1000, 1, 3600); + for (i, domain) in domains.iter().enumerate() { + let pkt_a = response( + i as u16, + domain, + vec![ + a_record(domain, &format!("10.0.{}.1", i % 256), 300), + a_record(domain, &format!("10.0.{}.2", i % 256), 300), + ], + ); + parsed_data_bytes += pkt_a.heap_bytes(); + cache2.insert(domain, QueryType::A, &pkt_a); + + if i % 2 == 0 { + let pkt_aaaa = response( + (i + 1000) as u16, + domain, + vec![aaaa_record(domain, &format!("2001:db8::{:x}", i), 600)], + ); + parsed_data_bytes += pkt_aaaa.heap_bytes(); + cache2.insert(domain, QueryType::AAAA, &pkt_aaaa); + } + } + } + + let wire_total = total_wire_bytes + total_wire_meta_bytes; + let entry_count = cache.len(); + + // Also measure the struct size difference per entry + let parsed_struct = std::mem::size_of::(); + let wire_struct = std::mem::size_of::>() + std::mem::size_of::>() + std::mem::size_of::(); // wire + offsets + answer_count + + println!(); + println!("=== Cache Memory Footprint Baseline ({} entries) ===", entry_count); + println!(); + println!("Variable data (heap, per-entry payload):"); + println!(" Parsed (packet.heap_bytes): {} bytes ({:.1}/entry)", parsed_data_bytes, parsed_data_bytes as f64 / entry_count as f64); + println!(" Wire (bytes + TTL offsets): {} bytes ({:.1}/entry)", wire_total, wire_total as f64 / entry_count as f64); + println!(" Ratio: {:.1}x smaller with wire", parsed_data_bytes as f64 / wire_total as f64); + println!(); + println!("Struct overhead (stack, per entry):"); + println!(" DnsPacket: {} bytes", parsed_struct); + println!(" Wire (Vec+Vec+usize): {} bytes", wire_struct); + println!(); + println!("Total per-entry (struct + avg heap):"); + let parsed_total_per = parsed_struct as f64 + parsed_data_bytes as f64 / entry_count as f64; + let wire_total_per = wire_struct as f64 + wire_total as f64 / entry_count as f64; + println!(" Parsed: {:.0} bytes", parsed_total_per); + println!(" Wire: {:.0} bytes", wire_total_per); + println!(" Ratio: {:.1}x smaller with wire", parsed_total_per / wire_total_per); + println!(); + + // Assertions + assert!( + wire_total < parsed_data_bytes, + "wire data ({wire_total}) should be smaller than parsed data ({parsed_data_bytes})" + ); + } + + #[test] + fn cache_max_entries_cap() { + let mut cache = DnsCache::new(2, 1, 3600); + for i in 0..3 { + let domain = format!("test{}.com", i); + let pkt = response( + i as u16, + &domain, + vec![a_record(&domain, &format!("1.2.3.{}", i), 3600)], + ); + cache.insert(&domain, QueryType::A, &pkt); + } + // Should not exceed max (third insert is silently dropped or evicts) + assert!(cache.len() <= 2); + } +} diff --git a/tests/integration.sh b/tests/integration.sh index 92da878..c70ec59 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -53,7 +53,17 @@ CONF echo "Starting Numa on :$PORT ($SUITE_NAME)..." RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & NUMA_PID=$! - sleep 4 + sleep 2 + + # Wait for blocklist to load (if blocking is enabled in this suite) + if echo "$SUITE_CONFIG" | grep -q 'enabled = true'; then + for i in $(seq 1 20); do + LOADED=$(curl -sf http://127.0.0.1:$API_PORT/blocking/stats 2>/dev/null \ + | grep -o '"domains_loaded":[0-9]*' | cut -d: -f2) + if [ "${LOADED:-0}" -gt 0 ]; then break; fi + sleep 1 + done + fi if ! kill -0 "$NUMA_PID" 2>/dev/null; then echo "Failed to start Numa:" -- 2.34.1 From 5d9a3a809b4bf7e85b3243efa69293cf2f0e399f Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 06:22:42 +0300 Subject: [PATCH 014/139] feat: DoT client, recursive optimization, bench refactor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add DoT forwarding client (tls://IP#hostname upstream config) - Recursive: cache NS delegations, serve-stale (RFC 8767), parallel NS queries on cold, no TCP fallback on individual UDP timeouts, 400ms NS/TCP timeout (down from 800/1500ms) - Reduce recursive p99 from 2367ms to 402ms (vs Unbound's 148ms) - Refactor benchmark suite: generic compare_two engine, delete one-off diagnostics (1969 → 750 lines) - Code cleanup: forward_query delegates to _raw, Option for tls_name, saturating_sub for ns_idx --- Cargo.lock | 2 +- benches/numa-bench.toml | 10 +- benches/recursive_compare.rs | 2060 ++++++++++++---------------------- src/forward.rs | 53 +- src/recursive.rs | 29 +- 5 files changed, 754 insertions(+), 1400 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eaba214..c0f7692 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1358,7 +1358,7 @@ dependencies = [ "tokio-rustls", "toml", "tower", - "webpki-roots", + "webpki-roots 1.0.6", ] [[package]] diff --git a/benches/numa-bench.toml b/benches/numa-bench.toml index 0e058af..6124840 100644 --- a/benches/numa-bench.toml +++ b/benches/numa-bench.toml @@ -5,7 +5,8 @@ api_bind_addr = "127.0.0.1" data_dir = "/tmp/numa-bench" [upstream] -mode = "recursive" +mode = "forward" +address = ["https://9.9.9.9/dns-query"] timeout_ms = 10000 [cache] @@ -15,8 +16,13 @@ max_ttl = 3600 [blocking] enabled = false +[proxy] +port = 8080 +tls_port = 8443 + [dot] -enabled = false +enabled = true +port = 8530 [mobile] enabled = false diff --git a/benches/recursive_compare.rs b/benches/recursive_compare.rs index e35768c..12f3689 100644 --- a/benches/recursive_compare.rs +++ b/benches/recursive_compare.rs @@ -1,20 +1,18 @@ -//! DoH forwarding benchmark: Numa vs hickory-resolver. +//! DNS forwarding benchmark suite. //! -//! Both forward to the same DoH upstream (Quad9). -//! Measures end-to-end resolution time through each implementation. -//! -//! Fairness: -//! - Both reuse a single TLS connection (Numa via persistent server, -//! Hickory via a shared resolver instance with cache_size=0). -//! - Measurement order is alternated each round to cancel order bias. -//! - Numa cache is flushed before each query. -//! - 100 domains × 10 rounds for statistical confidence. +//! Modes: +//! (default) Numa server (UDP) vs Hickory library (DoH) — the original benchmark +//! --diag Hickory connection reuse diagnostic (20 queries) +//! --diag-clients Per-query reqwest vs Hickory timing (20 queries) +//! --direct Library-to-library: Numa forward_query_raw vs Hickory resolver.lookup +//! --hedge-5x Hedging: single vs hedge-same vs hedge-dual vs Hickory (5 iterations) +//! --vs-unbound Server-to-server: Numa vs Unbound (plain UDP, caching) +//! --vs-dot DoT server: Numa vs Unbound +//! --vs-doh-servers DoH server: Numa vs Unbound (DoT upstream) //! //! Setup: -//! 1. Start a bench Numa instance: -//! cargo run -- benches/numa-bench.toml -//! 2. Run: -//! cargo bench --bench recursive_compare +//! 1. Start a bench Numa instance: cargo run -- benches/numa-bench.toml +//! 2. Run: cargo bench --bench recursive_compare [-- --mode] use std::net::SocketAddr; use std::time::{Duration, Instant}; @@ -130,216 +128,585 @@ const DOMAINS: &[&str] = &[ const ROUNDS: usize = 10; fn main() { - let diag = std::env::args().any(|a| a == "--diag"); - let direct = std::env::args().any(|a| a == "--direct"); + let arg = |flag: &str| std::env::args().any(|a| a == flag); let rt = tokio::runtime::Runtime::new().unwrap(); - if diag { - run_diag(&rt); - return; + if arg("--diag") { + return run_diag(&rt); + } + if arg("--diag-clients") { + return run_diag_clients(&rt); + } + if arg("--direct") { + return run_direct(&rt); + } + if arg("--hedge-5x") { + return run_hedge_multi(&rt, 5); + } + if arg("--vs-unbound") { + return run_server_comparison(&rt, "Unbound", "127.0.0.1:5456", 5); + } + if arg("--vs-dnscrypt") { + return run_server_comparison(&rt, "dnscrypt-proxy", "127.0.0.1:5455", 5); + } + if arg("--vs-dot") { + return run_dot_comparison(&rt, 5); + } + if arg("--vs-doh-servers") { + return run_doh_comparison(&rt, 5); } - if direct { - run_direct(&rt); - return; + // Default: Numa server (UDP) vs Hickory library (DoH) + run_default(&rt); +} + +// ── Generic 2-way comparison engine ───────────────────────────── + +fn compare_two( + rt: &tokio::runtime::Runtime, + title: &str, + name_a: &str, + name_b: &str, + measure_a: &dyn Fn(&str) -> f64, + measure_b: &dyn Fn(&str) -> f64, + iterations: usize, +) { + let flush = std::env::args().any(|a| a == "--flush"); + println!("{}", title); + println!( + "{} domains × {} rounds × {} iterations\n", + DOMAINS.len(), + ROUNDS, + iterations + ); + + let mut all_a = Vec::new(); + let mut all_b = Vec::new(); + let mut iter_stats: Vec<[(f64, f64, f64, f64, f64); 2]> = Vec::new(); + + for iter in 1..=iterations { + println!(" iteration {}/{}...", iter, iterations); + let mut a = Vec::new(); + let mut b = Vec::new(); + + for domain in DOMAINS { + for round in 0..ROUNDS { + if flush { + flush_cache(); + std::thread::sleep(Duration::from_millis(5)); + } + if round % 2 == 0 { + a.push(measure_a(domain)); + b.push(measure_b(domain)); + } else { + b.push(measure_b(domain)); + a.push(measure_a(domain)); + } + } + } + + iter_stats.push([stats(&mut a), stats(&mut b)]); + all_a.extend_from_slice(&a); + all_b.extend_from_slice(&b); } - if std::env::args().any(|a| a == "--diag-clients") { - run_diag_clients(&rt); - return; + print_results( + name_a, + name_b, + &iter_stats, + &mut all_a, + &mut all_b, + iterations, + ); +} + +fn print_results( + name_a: &str, + name_b: &str, + iter_stats: &[[(f64, f64, f64, f64, f64); 2]], + all_a: &mut Vec, + all_b: &mut Vec, + iterations: usize, +) { + let w = name_a.len().max(name_b.len()).max(6); + + println!("\n=== Per-iteration medians ==="); + println!("{:<8} {:>w$} {:>w$}", "iter", name_a, name_b, w = w + 3); + for (i, s) in iter_stats.iter().enumerate() { + println!( + "{:<8} {:>w$.1} ms {:>w$.1} ms", + i + 1, + s[0].1, + s[1].1, + w = w + ); } - if std::env::args().any(|a| a == "--spike-trace") { - run_spike_trace(&rt); - return; + println!("\n=== Per-iteration p99 ==="); + println!("{:<8} {:>w$} {:>w$}", "iter", name_a, name_b, w = w + 3); + for (i, s) in iter_stats.iter().enumerate() { + println!( + "{:<8} {:>w$.1} ms {:>w$.1} ms", + i + 1, + s[0].3, + s[1].3, + w = w + ); } - if std::env::args().any(|a| a == "--spike-phases") { - run_spike_phases(&rt); - return; - } + let (a_m, a_med, a_p95, a_p99, a_sd) = stats(all_a); + let (b_m, b_med, b_p95, b_p99, b_sd) = stats(all_b); - if std::env::args().any(|a| a == "--spike-heartbeat") { - run_spike_heartbeat(&rt); - return; - } + let total = iterations * DOMAINS.len() * ROUNDS; + println!("\n=== Aggregated ({} samples per method) ===\n", total); + println!("{:<10} {:>w$} {:>w$}", "", name_a, name_b, w = w + 3); + println!("{:<10} {:>w$.1} ms {:>w$.1} ms", "mean", a_m, b_m, w = w); + println!( + "{:<10} {:>w$.1} ms {:>w$.1} ms", + "median", + a_med, + b_med, + w = w + ); + println!( + "{:<10} {:>w$.1} ms {:>w$.1} ms", + "p95", + a_p95, + b_p95, + w = w + ); + println!( + "{:<10} {:>w$.1} ms {:>w$.1} ms", + "p99", + a_p99, + b_p99, + w = w + ); + println!("{:<10} {:>w$.1} ms {:>w$.1} ms", "σ", a_sd, b_sd, w = w); - if std::env::args().any(|a| a == "--hedge") { - run_hedge(&rt); - return; - } + let pct = |a: f64, b: f64| { + if b.abs() > 0.001 { + (a - b) / b * 100.0 + } else { + 0.0 + } + }; + println!("\n{} vs {}:", name_a, name_b); + println!(" mean: {:+.1} ms ({:+.0}%)", a_m - b_m, pct(a_m, b_m)); + println!( + " median: {:+.1} ms ({:+.0}%)", + a_med - b_med, + pct(a_med, b_med) + ); + println!( + " p99: {:+.1} ms ({:+.0}%)", + a_p99 - b_p99, + pct(a_p99, b_p99) + ); +} - if std::env::args().any(|a| a == "--hedge-5x") { - run_hedge_multi(&rt, 5); - return; - } - - if std::env::args().any(|a| a == "--vs-dnscrypt") { - run_vs_dnscrypt(&rt, 5); - return; - } - - if std::env::args().any(|a| a == "--vs-unbound") { - run_vs_unbound(&rt, 5); - return; - } +// ── Modes ─────────────────────────────────────────────────────── +/// Default: Numa server (UDP) vs Hickory library (DoH), cache flushed. +fn run_default(rt: &tokio::runtime::Runtime) { let numa_addr: SocketAddr = NUMA_BENCH.parse().unwrap(); - - println!("DoH Forwarding Benchmark: Numa vs hickory-resolver"); - println!("Both forwarding to {DOH_UPSTREAM}"); - println!("{} domains × {ROUNDS} rounds", DOMAINS.len()); - println!(); - - // Verify bench Numa is reachable if rt.block_on(query_udp(numa_addr, "example.com")).is_none() { eprintln!("Bench Numa not responding on {numa_addr}"); - eprintln!(); - eprintln!("Start it with:"); - eprintln!(" cargo run -- benches/numa-bench.toml"); + eprintln!("Start with: cargo run -- benches/numa-bench.toml"); std::process::exit(1); } - // Build a shared Hickory resolver (reuses TLS connection, like Numa does) let resolver = rt.block_on(build_hickory_resolver()); - // Warm up both paths (TLS handshake, connection establishment) - println!("Warming up connections..."); + println!("Warming up..."); for _ in 0..3 { rt.block_on(query_udp(numa_addr, "example.com")); rt.block_on(query_hickory_doh(&resolver, "example.com")); } flush_cache(); - println!( - "{:<30} {:>10} {:>10} {:>10} {:>8} {:>8}", - "Domain", "Numa (ms)", "Hickory", "Delta", "σ Numa", "σ Hick" - ); - println!("{}", "-".repeat(92)); - - let mut numa_all = Vec::new(); - let mut hickory_all = Vec::new(); - let mut per_domain: Vec<(&str, f64, f64, f64, f64, f64)> = Vec::new(); - - for domain in DOMAINS { - let mut numa_times = Vec::with_capacity(ROUNDS); - let mut hickory_times = Vec::with_capacity(ROUNDS); - - for round in 0..ROUNDS { + compare_two( + rt, + &format!("DoH Forwarding: Numa server vs Hickory library\nBoth → {DOH_UPSTREAM}"), + "Numa", + "Hickory", + &|domain| { flush_cache(); std::thread::sleep(Duration::from_millis(10)); + let t = Instant::now(); + let _ = rt.block_on(query_udp(numa_addr, domain)); + t.elapsed().as_secs_f64() * 1000.0 + }, + &|domain| { + let t = Instant::now(); + let _ = rt.block_on(query_hickory_doh(&resolver, domain)); + t.elapsed().as_secs_f64() * 1000.0 + }, + 1, + ); +} - // Alternate measurement order each round to cancel systematic bias - if round % 2 == 0 { - // Numa first - let t = measure(&rt, || rt.block_on(query_udp(numa_addr, domain))); - numa_times.push(t); - let t = measure(&rt, || rt.block_on(query_hickory_doh(&resolver, domain))); - hickory_times.push(t); - } else { - // Hickory first - let t = measure(&rt, || rt.block_on(query_hickory_doh(&resolver, domain))); - hickory_times.push(t); - flush_cache(); - std::thread::sleep(Duration::from_millis(10)); - let t = measure(&rt, || rt.block_on(query_udp(numa_addr, domain))); - numa_times.push(t); +/// Library-to-library: Numa forward_query_raw vs Hickory resolver.lookup. +fn run_direct(rt: &tokio::runtime::Runtime) { + let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let resolver = rt.block_on(build_hickory_resolver()); + let timeout = Duration::from_secs(10); + + println!("Warming up..."); + for _ in 0..3 { + let w = build_query_vec("example.com"); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &upstream, timeout)); + let _ = rt.block_on(query_hickory_doh(&resolver, "example.com")); + } + + compare_two( + rt, + &format!("Direct DoH: Numa forward_query_raw vs Hickory resolver.lookup\nBoth → {DOH_UPSTREAM}, no server pipeline"), + "Numa", "Hickory", + &|domain| { + let w = build_query_vec(domain); + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &upstream, timeout)); + t.elapsed().as_secs_f64() * 1000.0 + }, + &|domain| { + let t = Instant::now(); + let _ = rt.block_on(query_hickory_doh(&resolver, domain)); + t.elapsed().as_secs_f64() * 1000.0 + }, + 5, + ); +} + +/// Server-to-server: Numa vs another server, both on plain UDP. +fn run_server_comparison( + rt: &tokio::runtime::Runtime, + other_name: &str, + other_addr: &str, + iterations: usize, +) { + let numa_addr: SocketAddr = NUMA_BENCH.parse().unwrap(); + let other: SocketAddr = other_addr.parse().unwrap(); + + for (name, addr) in [("Numa", numa_addr), (other_name, other)] { + if rt.block_on(query_udp(addr, "example.com")).is_none() { + eprintln!("{name} not responding on {addr}"); + std::process::exit(1); + } + } + + println!("Warming up..."); + for _ in 0..5 { + let _ = rt.block_on(query_udp(numa_addr, "example.com")); + let _ = rt.block_on(query_udp(other, "example.com")); + } + + compare_two( + rt, + &format!("Server-to-Server: Numa vs {other_name} (UDP, caching)"), + "Numa", + other_name, + &|domain| { + let t = Instant::now(); + let _ = rt.block_on(query_udp(numa_addr, domain)); + t.elapsed().as_secs_f64() * 1000.0 + }, + &|domain| { + let t = Instant::now(); + let _ = rt.block_on(query_udp(other, domain)); + t.elapsed().as_secs_f64() * 1000.0 + }, + iterations, + ); +} + +/// DoT server comparison: Numa vs Unbound. +fn run_dot_comparison(rt: &tokio::runtime::Runtime, iterations: usize) { + const NUMA_DOT: &str = "127.0.0.1:8530"; + const UNBOUND_DOT: &str = "127.0.0.1:8531"; + + let _ = rustls::crypto::ring::default_provider().install_default(); + let tls_config = build_insecure_tls_config(); + + for (name, addr) in [("Numa", NUMA_DOT), ("Unbound", UNBOUND_DOT)] { + match rt.block_on(query_dot_once(addr, "example.com", &tls_config)) { + Ok(_) => println!("{name} DoT: OK"), + Err(e) => { + eprintln!("{name} DoT not responding on {addr}: {e}"); + std::process::exit(1); + } + } + } + + println!("Warming up..."); + for _ in 0..3 { + let _ = rt.block_on(query_dot_once(NUMA_DOT, "example.com", &tls_config)); + let _ = rt.block_on(query_dot_once(UNBOUND_DOT, "example.com", &tls_config)); + } + + compare_two( + rt, + "DoT Server: Numa vs Unbound (both DoT→clients, forwarding to Quad9)", + "Numa", + "Unbound", + &|domain| { + let t = Instant::now(); + let _ = rt.block_on(query_dot_once(NUMA_DOT, domain, &tls_config)); + t.elapsed().as_secs_f64() * 1000.0 + }, + &|domain| { + let t = Instant::now(); + let _ = rt.block_on(query_dot_once(UNBOUND_DOT, domain, &tls_config)); + t.elapsed().as_secs_f64() * 1000.0 + }, + iterations, + ); +} + +/// DoH server comparison: Numa vs Unbound (both DoH→clients, DoT upstream). +fn run_doh_comparison(rt: &tokio::runtime::Runtime, iterations: usize) { + const NUMA_DOH: &str = "https://127.0.0.1:8443/dns-query"; + const UNBOUND_DOH: &str = "https://127.0.0.1:8445/dns-query"; + + let client = reqwest::Client::builder() + .use_rustls_tls() + .danger_accept_invalid_certs(true) + .http2_initial_stream_window_size(65_535) + .http2_initial_connection_window_size(65_535) + .pool_idle_timeout(Duration::from_secs(300)) + .build() + .unwrap(); + + for (name, url, host) in [ + ("Numa", NUMA_DOH, Some("numa.numa")), + ("Unbound", UNBOUND_DOH, None), + ] { + let w = build_query_vec("example.com"); + match rt.block_on(query_doh_server(&client, url, &w, host)) { + Ok(_) => println!("{name} DoH: OK"), + Err(e) => { + eprintln!("{name} DoH not responding: {e}"); + std::process::exit(1); + } + } + } + + println!("Warming up..."); + for _ in 0..5 { + let w = build_query_vec("example.com"); + let _ = rt.block_on(query_doh_server(&client, NUMA_DOH, &w, Some("numa.numa"))); + let _ = rt.block_on(query_doh_server(&client, UNBOUND_DOH, &w, None)); + } + + compare_two( + rt, + "DoH Server: Numa vs Unbound (both DoH→clients, DoT upstream)", + "Numa", + "Unbound", + &|domain| { + let w = build_query_vec(domain); + let t = Instant::now(); + let _ = rt.block_on(query_doh_server(&client, NUMA_DOH, &w, Some("numa.numa"))); + t.elapsed().as_secs_f64() * 1000.0 + }, + &|domain| { + let w = build_query_vec(domain); + let t = Instant::now(); + let _ = rt.block_on(query_doh_server(&client, UNBOUND_DOH, &w, None)); + t.elapsed().as_secs_f64() * 1000.0 + }, + iterations, + ); +} + +/// Hedging: single vs hedge-same vs hedge-dual vs Hickory. +/// This is the one mode that compares 4 contenders, not 2. +fn run_hedge_multi(rt: &tokio::runtime::Runtime, iterations: usize) { + let hedge_delay = Duration::from_millis(10); + let timeout = Duration::from_secs(10); + + println!("Hedging Benchmark × {iterations} iterations"); + println!("Upstream: {DOH_UPSTREAM}"); + println!("Hedge delay: {hedge_delay:?}"); + println!( + "{} domains × {ROUNDS} rounds per iteration\n", + DOMAINS.len() + ); + + let primary = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let primary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let secondary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let resolver = rt.block_on(build_hickory_resolver()); + + println!("Warming up..."); + for _ in 0..5 { + let w = build_query_vec("example.com"); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary, timeout)); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary_dual, timeout)); + let _ = rt.block_on(numa::forward::forward_query_raw( + &w, + &secondary_dual, + timeout, + )); + let _ = rt.block_on(query_hickory_doh(&resolver, "example.com")); + } + + let labels = ["Single", "Hedge-same", "Hedge-dual", "Hickory"]; + let mut all: [Vec; 4] = [vec![], vec![], vec![], vec![]]; + let mut iter_medians: Vec<[f64; 4]> = vec![]; + let mut iter_p99s: Vec<[f64; 4]> = vec![]; + + for iter in 1..=iterations { + println!(" iteration {iter}/{iterations}..."); + let mut samples: [Vec; 4] = [vec![], vec![], vec![], vec![]]; + + for domain in DOMAINS { + for _ in 0..ROUNDS { + let w = build_query_vec(domain); + + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary, timeout)); + samples[0].push(t.elapsed().as_secs_f64() * 1000.0); + + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_with_hedging_raw( + &w, + &primary, + &primary, + hedge_delay, + timeout, + )); + samples[1].push(t.elapsed().as_secs_f64() * 1000.0); + + let t = Instant::now(); + let _ = rt.block_on(numa::forward::forward_with_hedging_raw( + &w, + &primary_dual, + &secondary_dual, + hedge_delay, + timeout, + )); + samples[2].push(t.elapsed().as_secs_f64() * 1000.0); + + let t = Instant::now(); + let _ = rt.block_on(query_hickory_doh(&resolver, domain)); + samples[3].push(t.elapsed().as_secs_f64() * 1000.0); } } - let numa_avg = mean(&numa_times); - let hickory_avg = mean(&hickory_times); - let numa_sd = stddev(&numa_times); - let hickory_sd = stddev(&hickory_times); - let delta = numa_avg - hickory_avg; + let s: Vec<_> = samples.iter_mut().map(|v| stats(v)).collect(); + iter_medians.push([s[0].1, s[1].1, s[2].1, s[3].1]); + iter_p99s.push([s[0].3, s[1].3, s[2].3, s[3].3]); + for (i, v) in samples.iter().enumerate() { + all[i].extend_from_slice(v); + } + } - numa_all.extend_from_slice(&numa_times); - hickory_all.extend_from_slice(&hickory_times); - per_domain.push((domain, numa_avg, hickory_avg, delta, numa_sd, hickory_sd)); - - let delta_str = format_delta(delta); + println!("\n=== Per-iteration medians ==="); + println!( + "{:<8} {:>10} {:>12} {:>12} {:>10}", + "iter", labels[0], labels[1], labels[2], labels[3] + ); + for (i, m) in iter_medians.iter().enumerate() { println!( - "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms {:>5.1}ms {:>5.1}ms", - domain, numa_avg, hickory_avg, delta_str, numa_sd, hickory_sd + "{:<8} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + i + 1, + m[0], + m[1], + m[2], + m[3] ); } - println!("{}", "-".repeat(92)); - - let numa_mean = mean(&numa_all); - let hickory_mean = mean(&hickory_all); - let delta_mean = numa_mean - hickory_mean; - + println!("\n=== Per-iteration p99 ==="); println!( - "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms {:>5.1}ms {:>5.1}ms", - "OVERALL MEAN", - numa_mean, - hickory_mean, - format_delta(delta_mean), - stddev(&numa_all), - stddev(&hickory_all), + "{:<8} {:>10} {:>12} {:>12} {:>10}", + "iter", labels[0], labels[1], labels[2], labels[3] ); - - // Median - let numa_med = median(&mut numa_all); - let hickory_med = median(&mut hickory_all); - println!( - "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms", - "MEDIAN", - numa_med, - hickory_med, - format_delta(numa_med - hickory_med), - ); - - // P95 - let numa_p95 = percentile(&numa_all, 95.0); - let hickory_p95 = percentile(&hickory_all, 95.0); - println!( - "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms", - "P95", - numa_p95, - hickory_p95, - format_delta(numa_p95 - hickory_p95), - ); - - println!(); - let total_queries = DOMAINS.len() * ROUNDS; - if numa_mean < hickory_mean { - let pct = ((hickory_mean - numa_mean) / hickory_mean * 100.0).round(); - println!("Numa is ~{pct}% faster (mean over {total_queries} queries)."); - } else if hickory_mean < numa_mean { - let pct = ((numa_mean - hickory_mean) / numa_mean * 100.0).round(); - println!("Hickory is ~{pct}% faster (mean over {total_queries} queries)."); - } else { - println!("Both are equal (mean over {total_queries} queries)."); + for (i, p) in iter_p99s.iter().enumerate() { + println!( + "{:<8} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + i + 1, + p[0], + p[1], + p[2], + p[3] + ); } - println!(); - println!("Methodology:"); - println!(" - Both forward to {DOH_UPSTREAM} over a reused TLS connection."); - println!(" - Numa cache flushed before each query. Hickory cache disabled."); - println!(" - Measurement order alternates each round to cancel order bias."); - println!(" - {} domains × {ROUNDS} rounds = {total_queries} queries per resolver.", DOMAINS.len()); + let s: Vec<_> = all + .iter_mut() + .map(|v| { + let (m, med, p95, p99, sd) = stats(v); + [m, med, p95, p99, sd] + }) + .collect(); + let total = iterations * DOMAINS.len() * ROUNDS; + println!("\n=== Aggregated ({total} samples per method) ===\n"); + println!( + "{:<10} {:>10} {:>12} {:>12} {:>10}", + "", labels[0], labels[1], labels[2], labels[3] + ); + for (row, idx) in [("mean", 0), ("median", 1), ("p95", 2), ("p99", 3), ("σ", 4)] { + println!( + "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", + row, s[0][idx], s[1][idx], s[2][idx], s[3][idx] + ); + } + + let pct = |a: f64, b: f64| { + if b.abs() > 0.001 { + (a - b) / b * 100.0 + } else { + 0.0 + } + }; + println!( + "\nHedge-same vs Single: mean {:+.0}%, p95 {:+.0}%, p99 {:+.0}%", + pct(s[1][0], s[0][0]), + pct(s[1][2], s[0][2]), + pct(s[1][3], s[0][3]) + ); + println!( + "Hedge-same vs Hickory: mean {:+.0}%, p95 {:+.0}%, p99 {:+.0}%", + pct(s[1][0], s[3][0]), + pct(s[1][2], s[3][2]), + pct(s[1][3], s[3][3]) + ); } +// ── Diagnostics (small, kept for debugging) ───────────────────── + fn run_diag(rt: &tokio::runtime::Runtime) { - println!("Hickory connection reuse diagnostic"); - println!("20 sequential queries to {DOH_UPSTREAM} via one shared resolver"); - println!("If conn is reused: query 1 slow (TLS handshake), rest fast.\n"); + println!("Hickory connection reuse diagnostic\n20 queries to {DOH_UPSTREAM}\n"); let resolver = rt.block_on(build_hickory_resolver()); - let domains = [ - "example.com", "rust-lang.org", "kernel.org", "google.com", "github.com", - "example.com", "rust-lang.org", "kernel.org", "google.com", "github.com", - "example.com", "rust-lang.org", "kernel.org", "google.com", "github.com", - "example.com", "rust-lang.org", "kernel.org", "google.com", "github.com", + "example.com", + "rust-lang.org", + "kernel.org", + "google.com", + "github.com", + "example.com", + "rust-lang.org", + "kernel.org", + "google.com", + "github.com", + "example.com", + "rust-lang.org", + "kernel.org", + "google.com", + "github.com", + "example.com", + "rust-lang.org", + "kernel.org", + "google.com", + "github.com", ]; println!("{:>3} {:<20} {:>10}", "#", "Domain", "Time (ms)"); println!("{}", "-".repeat(40)); - for (i, domain) in domains.iter().enumerate() { use hickory_resolver::proto::rr::RecordType; let start = Instant::now(); @@ -347,143 +714,31 @@ fn run_diag(rt: &tokio::runtime::Runtime) { let ms = start.elapsed().as_secs_f64() * 1000.0; match &result { Ok(lookup) => { - let first = lookup.iter().next().map(|r| format!("{r}")).unwrap_or_default(); - println!("{:>3} {:<20} {:>7.1} ms OK {}", i + 1, domain, ms, first); - } - Err(e) => { - println!("{:>3} {:<20} {:>7.1} ms ERR {}", i + 1, domain, ms, e); + let first = lookup + .iter() + .next() + .map(|r| format!("{r}")) + .unwrap_or_default(); + println!( + "{:>3} {:<20} {:>7.1} ms OK {}", + i + 1, + domain, + ms, + first + ); } + Err(e) => println!("{:>3} {:<20} {:>7.1} ms ERR {}", i + 1, domain, ms, e), } } } -/// Library-to-library comparison: Numa's forward_query_raw vs Hickory's resolver.lookup(). -/// No UDP, no server pipeline — just the DoH forwarding call. -fn run_direct(rt: &tokio::runtime::Runtime) { - println!("Direct DoH Forwarding: Numa forward_query_raw vs Hickory resolver.lookup()"); - println!("Both forwarding to {DOH_UPSTREAM} — no UDP, no server pipeline"); - println!("{} domains × {ROUNDS} rounds", DOMAINS.len()); - println!(); - - // Build Numa's upstream (shared reqwest client, reuses HTTP/2 connection) - let numa_upstream = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse upstream"); - let timeout = Duration::from_secs(10); - - // Build Hickory's resolver (shared, reuses HTTP/2 connection) - let resolver = rt.block_on(build_hickory_resolver()); - - // Warm up both - println!("Warming up connections..."); - for _ in 0..3 { - let wire = build_query_vec("example.com"); - let _ = rt.block_on(numa::forward::forward_query_raw(&wire, &numa_upstream, timeout)); - let _ = rt.block_on(query_hickory_doh(&resolver, "example.com")); - } - - println!( - "{:<30} {:>10} {:>10} {:>10} {:>8} {:>8}", - "Domain", "Numa (ms)", "Hickory", "Delta", "σ Numa", "σ Hick" - ); - println!("{}", "-".repeat(92)); - - let mut numa_all = Vec::new(); - let mut hickory_all = Vec::new(); - - for domain in DOMAINS { - let mut numa_times = Vec::with_capacity(ROUNDS); - let mut hickory_times = Vec::with_capacity(ROUNDS); - - for round in 0..ROUNDS { - let wire = build_query_vec(domain); - - if round % 2 == 0 { - let w = wire.clone(); - let t = measure(rt, || { - rt.block_on(numa::forward::forward_query_raw(&w, &numa_upstream, timeout)) - }); - numa_times.push(t); - let t = measure(rt, || rt.block_on(query_hickory_doh(&resolver, domain))); - hickory_times.push(t); - } else { - let t = measure(rt, || rt.block_on(query_hickory_doh(&resolver, domain))); - hickory_times.push(t); - let w = wire.clone(); - let t = measure(rt, || { - rt.block_on(numa::forward::forward_query_raw(&w, &numa_upstream, timeout)) - }); - numa_times.push(t); - } - } - - let numa_avg = mean(&numa_times); - let hickory_avg = mean(&hickory_times); - let numa_sd = stddev(&numa_times); - let hickory_sd = stddev(&hickory_times); - let delta = numa_avg - hickory_avg; - - numa_all.extend_from_slice(&numa_times); - hickory_all.extend_from_slice(&hickory_times); - - println!( - "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms {:>5.1}ms {:>5.1}ms", - domain, numa_avg, hickory_avg, format_delta(delta), numa_sd, hickory_sd - ); - } - - println!("{}", "-".repeat(92)); - let numa_mean = mean(&numa_all); - let hickory_mean = mean(&hickory_all); - println!( - "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms {:>5.1}ms {:>5.1}ms", - "OVERALL MEAN", numa_mean, hickory_mean, format_delta(numa_mean - hickory_mean), - stddev(&numa_all), stddev(&hickory_all), - ); - let numa_med = median(&mut numa_all); - let hickory_med = median(&mut hickory_all); - println!( - "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms", - "MEDIAN", numa_med, hickory_med, format_delta(numa_med - hickory_med), - ); - let numa_p95 = percentile(&numa_all, 95.0); - let hickory_p95 = percentile(&hickory_all, 95.0); - println!( - "{:<30} {:>7.1} ms {:>7.1} ms {:>7} ms", - "P95", numa_p95, hickory_p95, format_delta(numa_p95 - hickory_p95), - ); - - println!(); - let total_queries = DOMAINS.len() * ROUNDS; - if numa_mean < hickory_mean { - let pct = ((hickory_mean - numa_mean) / hickory_mean * 100.0).round(); - println!("Numa is ~{pct}% faster (mean over {total_queries} queries)."); - } else if hickory_mean < numa_mean { - let pct = ((numa_mean - hickory_mean) / numa_mean * 100.0).round(); - println!("Hickory is ~{pct}% faster (mean over {total_queries} queries)."); - } else { - println!("Both are equal (mean over {total_queries} queries)."); - } - - println!(); - println!("Methodology:"); - println!(" - Both forward to {DOH_UPSTREAM} over a reused TLS/HTTP2 connection."); - println!(" - No UDP, no server pipeline, no cache — pure DoH forwarding."); - println!(" - Numa: forward_query_raw (reqwest). Hickory: resolver.lookup (h2)."); - println!(" - {} domains × {ROUNDS} rounds = {total_queries} queries per implementation.", DOMAINS.len()); -} - -/// Per-query timing diagnostic: 20 queries each through reqwest and Hickory. -/// Shows whether reqwest has connection reuse issues or per-request overhead. fn run_diag_clients(rt: &tokio::runtime::Runtime) { - println!("Client diagnostic: reqwest vs Hickory per-query timing"); - println!("20 queries each to {DOH_UPSTREAM}\n"); + println!("Client diagnostic: reqwest vs Hickory (20 queries to {DOH_UPSTREAM})\n"); - let upstream = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse upstream"); + let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); let resolver = rt.block_on(build_hickory_resolver()); let timeout = Duration::from_secs(10); - // Warm both for _ in 0..3 { let w = build_query_vec("example.com"); let _ = rt.block_on(numa::forward::forward_query_raw(&w, &upstream, timeout)); @@ -491,18 +746,35 @@ fn run_diag_clients(rt: &tokio::runtime::Runtime) { } let domains = [ - "example.com", "google.com", "github.com", "rust-lang.org", "cloudflare.com", - "example.com", "google.com", "github.com", "rust-lang.org", "cloudflare.com", - "example.com", "google.com", "github.com", "rust-lang.org", "cloudflare.com", - "example.com", "google.com", "github.com", "rust-lang.org", "cloudflare.com", + "example.com", + "google.com", + "github.com", + "rust-lang.org", + "cloudflare.com", + "example.com", + "google.com", + "github.com", + "rust-lang.org", + "cloudflare.com", + "example.com", + "google.com", + "github.com", + "rust-lang.org", + "cloudflare.com", + "example.com", + "google.com", + "github.com", + "rust-lang.org", + "cloudflare.com", ]; - println!("{:>3} {:<20} {:>12} {:>12}", "#", "Domain", "reqwest", "Hickory"); + println!( + "{:>3} {:<20} {:>12} {:>12}", + "#", "Domain", "reqwest", "Hickory" + ); println!("{}", "-".repeat(55)); - for (i, domain) in domains.iter().enumerate() { let wire = build_query_vec(domain); - let start = Instant::now(); let r_result = rt.block_on(numa::forward::forward_query_raw(&wire, &upstream, timeout)); let r_ms = start.elapsed().as_secs_f64() * 1000.0; @@ -515,1076 +787,104 @@ fn run_diag_clients(rt: &tokio::runtime::Runtime) { println!( "{:>3} {:<20} {:>7.1} ms {} {:>7.1} ms {}", - i + 1, domain, r_ms, r_ok, h_ms, h_ok - ); - } -} - -/// Spike trace: fire 200 sequential queries through reqwest and log every one -/// with a timestamp. Analyze the distribution and find spike clusters. -fn run_spike_trace(rt: &tokio::runtime::Runtime) { - println!("Spike trace: 200 sequential reqwest DoH queries"); - println!("Target: {DOH_UPSTREAM}\n"); - - let upstream = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse upstream"); - let timeout = Duration::from_secs(10); - - // Warm - for _ in 0..5 { - let w = build_query_vec("example.com"); - let _ = rt.block_on(numa::forward::forward_query_raw(&w, &upstream, timeout)); - } - - // Run the entire 200-query loop inside ONE block_on to eliminate - // per-query runtime re-entry overhead. - let samples: Vec<(u128, f64)> = rt.block_on(async { - let test_start = Instant::now(); - let mut s = Vec::with_capacity(200); - for i in 0..200 { - let domain = match i % 5 { - 0 => "example.com", - 1 => "google.com", - 2 => "github.com", - 3 => "rust-lang.org", - _ => "cloudflare.com", - }; - let wire = build_query_vec(domain); - let req_start = Instant::now(); - let t_from_start_us = test_start.elapsed().as_micros(); - let _ = numa::forward::forward_query_raw(&wire, &upstream, timeout).await; - let ms = req_start.elapsed().as_secs_f64() * 1000.0; - s.push((t_from_start_us, ms)); - } - s - }); - - // Compute stats - let mut sorted_times: Vec = samples.iter().map(|(_, t)| *t).collect(); - sorted_times.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let n = sorted_times.len(); - let median = sorted_times[n / 2]; - let p90 = sorted_times[(n * 90) / 100]; - let p95 = sorted_times[(n * 95) / 100]; - let p99 = sorted_times[(n * 99) / 100]; - let max = sorted_times[n - 1]; - let mean: f64 = sorted_times.iter().sum::() / n as f64; - - println!("Distribution (n={}):", n); - println!(" mean: {:.1} ms", mean); - println!(" median: {:.1} ms", median); - println!(" p90: {:.1} ms", p90); - println!(" p95: {:.1} ms", p95); - println!(" p99: {:.1} ms", p99); - println!(" max: {:.1} ms", max); - println!(); - - // Define spike threshold as 3x median - let spike_threshold = median * 3.0; - let spikes: Vec<(usize, u128, f64)> = samples - .iter() - .enumerate() - .filter(|(_, (_, t))| *t > spike_threshold) - .map(|(i, (ts, t))| (i, *ts, *t)) - .collect(); - - println!("Spikes (> {:.1}ms, which is 3x median):", spike_threshold); - println!(" count: {}", spikes.len()); - if spikes.is_empty() { - return; - } - - // Inter-spike gaps (time between spikes) - let mut gaps_ms: Vec = Vec::new(); - for w in spikes.windows(2) { - let gap_us = w[1].1 - w[0].1; - gaps_ms.push(gap_us as f64 / 1000.0); - } - - println!(); - println!(" {:>4} {:>12} {:>10} {:>12}", "idx", "at (ms)", "latency", "gap from prev"); - for (i, ((idx, ts, latency), gap)) in spikes.iter().zip( - std::iter::once(&0.0).chain(gaps_ms.iter()) - ).enumerate() { - let _ = i; - let gap_str = if *gap > 0.0 { - format!("{:.0} ms", gap) - } else { - "-".to_string() - }; - println!(" {:>4} {:>9.1} {:>6.1} ms {:>12}", idx, *ts as f64 / 1000.0, latency, gap_str); - } - - if !gaps_ms.is_empty() { - let gap_mean: f64 = gaps_ms.iter().sum::() / gaps_ms.len() as f64; - let mut gap_sorted = gaps_ms.clone(); - gap_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let gap_median = gap_sorted[gap_sorted.len() / 2]; - println!(); - println!(" Inter-spike gap: mean={:.0}ms, median={:.0}ms", gap_mean, gap_median); - } -} - -/// Spike phases: time each step of the reqwest DoH call to find which phase -/// is slow during a spike. Reports (build+send, send->resp headers, body read). -fn run_spike_phases(rt: &tokio::runtime::Runtime) { - println!("Spike phases: timing each phase of reqwest DoH call"); - println!("Target: {DOH_UPSTREAM}\n"); - - // Build the same tuned client our forward_doh uses - let client = reqwest::Client::builder() - .use_rustls_tls() - .http2_initial_stream_window_size(65_535) - .http2_initial_connection_window_size(65_535) - .http2_keep_alive_interval(Duration::from_secs(15)) - .http2_keep_alive_while_idle(true) - .http2_keep_alive_timeout(Duration::from_secs(10)) - .pool_idle_timeout(Duration::from_secs(300)) - .pool_max_idle_per_host(1) - .build() - .unwrap(); - - // Warm up - for _ in 0..5 { - let wire = build_query_vec("example.com"); - let _ = rt.block_on(async { - client - .post(DOH_UPSTREAM) - .header("content-type", "application/dns-message") - .header("accept", "application/dns-message") - .body(wire) - .send() - .await - .ok()? - .bytes() - .await - .ok() - }); - } - - println!("{:>4} {:>8} {:>8} {:>8} {:>8}", "idx", "total", "build", "send", "body"); - println!("{}", "-".repeat(50)); - - let samples: Vec<(f64, f64, f64, f64)> = rt.block_on(async { - let mut s = Vec::with_capacity(200); - for i in 0..200 { - let domain = match i % 5 { - 0 => "example.com", - 1 => "google.com", - 2 => "github.com", - 3 => "rust-lang.org", - _ => "cloudflare.com", - }; - let wire = build_query_vec(domain); - - let t0 = Instant::now(); - // Phase 1: build the request - let req = client - .post(DOH_UPSTREAM) - .header("content-type", "application/dns-message") - .header("accept", "application/dns-message") - .body(wire); - let t1 = Instant::now(); - // Phase 2: send() — this is the dispatch channel + round trip to headers - let resp_result = req.send().await; - let t2 = Instant::now(); - // Phase 3: read body - let body_result = match resp_result { - Ok(r) => r.bytes().await.ok().map(|b| b.len()), - Err(_) => None, - }; - let t3 = Instant::now(); - - let build_ms = (t1 - t0).as_secs_f64() * 1000.0; - let send_ms = (t2 - t1).as_secs_f64() * 1000.0; - let body_ms = (t3 - t2).as_secs_f64() * 1000.0; - let total_ms = (t3 - t0).as_secs_f64() * 1000.0; - - s.push((total_ms, build_ms, send_ms, body_ms)); - let _ = body_result; - } - s - }); - - // Compute distribution on total - let mut totals: Vec = samples.iter().map(|s| s.0).collect(); - totals.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let median = totals[100]; - - // Print spikes (> 3x median) with phase breakdown - for (i, (total, build, send, body)) in samples.iter().enumerate() { - if *total > median * 3.0 { - println!( - "{:>4} {:>5.1} ms {:>5.1} ms {:>5.1} ms {:>5.1} ms", - i, total, build, send, body - ); - } - } - - // Summary: mean of each phase for spikes vs non-spikes - let (spike_samples, normal_samples): (Vec<_>, Vec<_>) = samples - .iter() - .partition(|(t, _, _, _)| *t > median * 3.0); - - let phase_means = |samples: &[&(f64, f64, f64, f64)]| -> (f64, f64, f64, f64) { - let n = samples.len() as f64; - if n == 0.0 { return (0.0, 0.0, 0.0, 0.0); } - let total: f64 = samples.iter().map(|s| s.0).sum::() / n; - let build: f64 = samples.iter().map(|s| s.1).sum::() / n; - let send: f64 = samples.iter().map(|s| s.2).sum::() / n; - let body: f64 = samples.iter().map(|s| s.3).sum::() / n; - (total, build, send, body) - }; - - let spike_refs: Vec<&(f64, f64, f64, f64)> = spike_samples.iter().copied().collect(); - let normal_refs: Vec<&(f64, f64, f64, f64)> = normal_samples.iter().copied().collect(); - let (s_total, s_build, s_send, s_body) = phase_means(&spike_refs); - let (n_total, n_build, n_send, n_body) = phase_means(&normal_refs); - - println!(); - println!("Summary (mean ms):"); - println!( - " {:<8} {:>8} {:>8} {:>8} {:>8}", - "", "total", "build", "send", "body" - ); - println!( - " {:<8} {:>5.1} ms {:>5.1} ms {:>5.1} ms {:>5.1} ms (n={})", - "normal", n_total, n_build, n_send, n_body, normal_refs.len() - ); - println!( - " {:<8} {:>5.1} ms {:>5.1} ms {:>5.1} ms {:>5.1} ms (n={})", - "spike", s_total, s_build, s_send, s_body, spike_refs.len() - ); - println!(); - println!("Delta (spike - normal):"); - println!( - " build: {:+.1} ms, send: {:+.1} ms, body: {:+.1} ms", - s_build - n_build, - s_send - n_send, - s_body - n_body - ); -} - -/// Heartbeat probe: run a parallel task that ticks every 5ms and records -/// how long each tick actually takes. If the heartbeat stalls during a DoH -/// spike, it's a tokio scheduling issue (runtime can't poll tasks). If -/// heartbeat is fine while send() is stuck, it's internal to hyper/h2. -fn run_spike_heartbeat(rt: &tokio::runtime::Runtime) { - use std::sync::{Arc, Mutex}; - - println!("Spike heartbeat probe"); - println!("Running DoH queries + parallel 5ms heartbeat task\n"); - - let upstream = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse upstream"); - let timeout = Duration::from_secs(10); - - // Warm up - for _ in 0..5 { - let w = build_query_vec("example.com"); - let _ = rt.block_on(numa::forward::forward_query_raw(&w, &upstream, timeout)); - } - - // Shared vecs: (relative_ms_from_start, event_kind, latency_ms) - // event_kind: 0 = heartbeat, 1 = doh query - type EventLog = Vec<(f64, u8, f64)>; - let events: Arc> = Arc::new(Mutex::new(Vec::with_capacity(2000))); - let stop = Arc::new(std::sync::atomic::AtomicBool::new(false)); - - let test_start = Instant::now(); - - rt.block_on(async { - // Spawn heartbeat task - let hb_events = Arc::clone(&events); - let hb_stop = Arc::clone(&stop); - let hb_start = test_start; - let heartbeat = tokio::spawn(async move { - let mut next_tick = Instant::now(); - let target = Duration::from_millis(5); - while !hb_stop.load(std::sync::atomic::Ordering::Relaxed) { - next_tick += target; - // Sleep until the next scheduled tick - let now = Instant::now(); - if next_tick > now { - tokio::time::sleep(next_tick - now).await; - } - // Measure how much we overshot the scheduled tick - let actual = Instant::now(); - let lag_ms = if actual > next_tick { - (actual - next_tick).as_secs_f64() * 1000.0 - } else { - 0.0 - }; - let t = (actual - hb_start).as_secs_f64() * 1000.0; - if let Ok(mut e) = hb_events.lock() { - e.push((t, 0, lag_ms)); - } - } - }); - - // Run 200 DoH queries and record their timings - for i in 0..200 { - let domain = match i % 5 { - 0 => "example.com", - 1 => "google.com", - 2 => "github.com", - 3 => "rust-lang.org", - _ => "cloudflare.com", - }; - let wire = build_query_vec(domain); - let req_start = Instant::now(); - let _ = numa::forward::forward_query_raw(&wire, &upstream, timeout).await; - let elapsed = req_start.elapsed().as_secs_f64() * 1000.0; - let t = (req_start - test_start).as_secs_f64() * 1000.0; - if let Ok(mut e) = events.lock() { - e.push((t, 1, elapsed)); - } - } - - stop.store(true, std::sync::atomic::Ordering::Relaxed); - let _ = heartbeat.await; - }); - - let events = events.lock().unwrap(); - - // Separate heartbeats and doh events - let hb: Vec<(f64, f64)> = events - .iter() - .filter(|(_, k, _)| *k == 0) - .map(|(t, _, l)| (*t, *l)) - .collect(); - let doh: Vec<(f64, f64)> = events - .iter() - .filter(|(_, k, _)| *k == 1) - .map(|(t, _, l)| (*t, *l)) - .collect(); - - // Heartbeat stats - let mut hb_lags: Vec = hb.iter().map(|(_, l)| *l).collect(); - hb_lags.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let hb_n = hb_lags.len(); - let hb_median = hb_lags[hb_n / 2]; - let hb_p95 = hb_lags[(hb_n * 95) / 100]; - let hb_p99 = hb_lags[(hb_n * 99) / 100]; - let hb_max = hb_lags[hb_n - 1]; - - // DoH stats - let mut doh_latencies: Vec = doh.iter().map(|(_, l)| *l).collect(); - doh_latencies.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let doh_n = doh_latencies.len(); - let doh_median = doh_latencies[doh_n / 2]; - let doh_p95 = doh_latencies[(doh_n * 95) / 100]; - let doh_max = doh_latencies[doh_n - 1]; - - println!("Heartbeat lag (tick overshoot, {}ms target):", 5); - println!(" n: {}", hb_n); - println!(" median: {:.2} ms", hb_median); - println!(" p95: {:.2} ms", hb_p95); - println!(" p99: {:.2} ms", hb_p99); - println!(" max: {:.2} ms", hb_max); - println!(); - println!("DoH latency:"); - println!(" n: {}", doh_n); - println!(" median: {:.1} ms", doh_median); - println!(" p95: {:.1} ms", doh_p95); - println!(" max: {:.1} ms", doh_max); - println!(); - - // Find DoH spikes and check heartbeat activity DURING each spike - let doh_spike_threshold = doh_median * 3.0; - let mut spikes_with_hb_lag = 0; - let mut spikes_total = 0; - let mut max_hb_during_any_spike = 0.0_f64; - - println!( - "Correlation: during each DoH spike (>{:.1}ms), max heartbeat lag:", - doh_spike_threshold - ); - println!(" {:>6} {:>10} {:>18}", "doh_at", "doh_ms", "max_hb_lag_during"); - - for (doh_t, doh_ms) in &doh { - if *doh_ms > doh_spike_threshold { - spikes_total += 1; - // Find heartbeats that happened during this DoH query - let spike_start = *doh_t; - let spike_end = spike_start + *doh_ms; - let mut max_hb = 0.0_f64; - for (hb_t, hb_lag) in &hb { - if *hb_t >= spike_start && *hb_t <= spike_end + 20.0 { - if *hb_lag > max_hb { - max_hb = *hb_lag; - } - } - } - if max_hb > 5.0 { - spikes_with_hb_lag += 1; - } - max_hb_during_any_spike = max_hb_during_any_spike.max(max_hb); - println!( - " {:>5.0} ms {:>7.1} ms {:>14.2} ms", - doh_t, doh_ms, max_hb - ); - } - } - - println!(); - println!("Conclusion:"); - if spikes_total == 0 { - println!(" No DoH spikes in this run."); - } else { - let pct = (spikes_with_hb_lag as f64 / spikes_total as f64 * 100.0).round(); - println!( - " {}/{} spikes ({:.0}%) had concurrent heartbeat lag >5ms.", - spikes_with_hb_lag, spikes_total, pct - ); - println!(" Max heartbeat lag during any spike: {:.2}ms", max_hb_during_any_spike); - println!(); - if max_hb_during_any_spike > 20.0 { - println!(" → Heartbeat stalls during DoH spikes: tokio scheduling / OS thread issue."); - println!(" The runtime can't poll ANY task — likely QoS demotion, GC pause,"); - println!(" or the worker thread is blocked somewhere."); - } else { - println!(" → Heartbeat runs normally during DoH spikes: internal to hyper/h2."); - println!(" The runtime is fine, but send()'s await is stuck waiting for"); - println!(" the ClientTask to poll the dispatch channel."); - } - } -} - -/// Hedging benchmark: tests four configurations against Hickory. -/// Single: 1 client → Quad9 (baseline) -/// Hedge-same: hedge against same client/connection → Quad9 -/// Hedge-dual: hedge against 2 separate clients, both → Quad9 (same upstream, 2 HTTP/2 conns) -/// Hickory: Hickory resolver → Quad9 (reference) -fn run_hedge(rt: &tokio::runtime::Runtime) { - let hedge_delay = Duration::from_millis(10); - - println!("Hedging Benchmark (all paths → Quad9 only)"); - println!("Upstream: {}", DOH_UPSTREAM); - println!("Hedge delay: {:?}", hedge_delay); - println!("{} domains × {} rounds\n", DOMAINS.len(), ROUNDS); - - // Primary and secondary: two separate reqwest clients → same Quad9 URL. - // This gives two independent HTTP/2 connections, so dispatch spikes - // are uncorrelated (at most one stalls at a time). - let primary_same = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse primary"); - let primary_dual = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse primary_dual"); - let secondary_dual = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse secondary_dual"); - let timeout = Duration::from_secs(10); - - let resolver = rt.block_on(build_hickory_resolver()); - - // Warm up all paths (separate connections need their own TLS handshake) - println!("Warming up connections..."); - for _ in 0..5 { - let w = build_query_vec("example.com"); - let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary_same, timeout)); - let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary_dual, timeout)); - let _ = rt.block_on(numa::forward::forward_query_raw(&w, &secondary_dual, timeout)); - let _ = rt.block_on(query_hickory_doh(&resolver, "example.com")); - } - - let mut single_all = Vec::new(); - let mut hedge_same_all = Vec::new(); - let mut hedge_dual_all = Vec::new(); - let mut hickory_all = Vec::new(); - - println!( - "{:<24} {:>10} {:>10} {:>10} {:>10}", - "Domain", "Single", "Hedge-same", "Hedge-dual", "Hickory" - ); - println!("{}", "-".repeat(78)); - - for domain in DOMAINS { - let mut single_times = Vec::with_capacity(ROUNDS); - let mut hedge_same_times = Vec::with_capacity(ROUNDS); - let mut hedge_dual_times = Vec::with_capacity(ROUNDS); - let mut hickory_times = Vec::with_capacity(ROUNDS); - - for _ in 0..ROUNDS { - let wire = build_query_vec(domain); - - let t = Instant::now(); - let _ = rt.block_on(numa::forward::forward_query_raw(&wire, &primary_same, timeout)); - single_times.push(t.elapsed().as_secs_f64() * 1000.0); - - let t = Instant::now(); - let _ = rt.block_on(numa::forward::forward_with_hedging_raw( - &wire, &primary_same, &primary_same, hedge_delay, timeout, - )); - hedge_same_times.push(t.elapsed().as_secs_f64() * 1000.0); - - let t = Instant::now(); - let _ = rt.block_on(numa::forward::forward_with_hedging_raw( - &wire, &primary_dual, &secondary_dual, hedge_delay, timeout, - )); - hedge_dual_times.push(t.elapsed().as_secs_f64() * 1000.0); - - let t = Instant::now(); - let _ = rt.block_on(query_hickory_doh(&resolver, domain)); - hickory_times.push(t.elapsed().as_secs_f64() * 1000.0); - } - - single_all.extend_from_slice(&single_times); - hedge_same_all.extend_from_slice(&hedge_same_times); - hedge_dual_all.extend_from_slice(&hedge_dual_times); - hickory_all.extend_from_slice(&hickory_times); - - println!( - "{:<24} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", + i + 1, domain, - mean(&single_times), - mean(&hedge_same_times), - mean(&hedge_dual_times), - mean(&hickory_times) + r_ms, + r_ok, + h_ms, + h_ok ); } - - println!("{}", "-".repeat(78)); - - let stats = |all: &mut Vec| -> (f64, f64, f64, f64, f64) { - let m = mean(all); - let med = median(all); - let p95 = percentile(all, 95.0); - let p99 = percentile(all, 99.0); - let sd = stddev(all); - (m, med, p95, p99, sd) - }; - - let (s_m, s_med, s_p95, s_p99, s_sd) = stats(&mut single_all); - let (hs_m, hs_med, hs_p95, hs_p99, hs_sd) = stats(&mut hedge_same_all); - let (hd_m, hd_med, hd_p95, hd_p99, hd_sd) = stats(&mut hedge_dual_all); - let (k_m, k_med, k_p95, k_p99, k_sd) = stats(&mut hickory_all); - - println!(); - println!( - "{:<10} {:>10} {:>10} {:>10} {:>10}", - "", "Single", "Hedge-same", "Hedge-dual", "Hickory" - ); - println!( - "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", - "mean", s_m, hs_m, hd_m, k_m - ); - println!( - "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", - "median", s_med, hs_med, hd_med, k_med - ); - println!( - "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", - "p95", s_p95, hs_p95, hd_p95, k_p95 - ); - println!( - "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", - "p99", s_p99, hs_p99, hd_p99, k_p99 - ); - println!( - "{:<10} {:>7.1} ms {:>7.1} ms {:>7.1} ms {:>7.1} ms", - "σ", s_sd, hs_sd, hd_sd, k_sd - ); - - println!(); - println!("Hedge-same improvement over single:"); - println!(" mean: {:+.0}%, p95: {:+.0}%, p99: {:+.0}%", - (hs_m - s_m) / s_m * 100.0, - (hs_p95 - s_p95) / s_p95 * 100.0, - (hs_p99 - s_p99) / s_p99 * 100.0); - println!("Hedge-dual improvement over single:"); - println!(" mean: {:+.0}%, p95: {:+.0}%, p99: {:+.0}%", - (hd_m - s_m) / s_m * 100.0, - (hd_p95 - s_p95) / s_p95 * 100.0, - (hd_p99 - s_p99) / s_p99 * 100.0); } -/// Run the hedging benchmark N times and aggregate samples across all runs. -/// Also reports per-run stats to show drift. -fn run_hedge_multi(rt: &tokio::runtime::Runtime, iterations: usize) { - let hedge_delay = Duration::from_millis(10); +// ── Stats helpers ─────────────────────────────────────────────── - println!("Hedging Benchmark × {} iterations", iterations); - println!("Upstream: {}", DOH_UPSTREAM); - println!("Hedge delay: {:?}", hedge_delay); - println!("{} domains × {} rounds per iteration\n", DOMAINS.len(), ROUNDS); - - let primary_same = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); - let primary_dual = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); - let secondary_dual = - numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); - let timeout = Duration::from_secs(10); - - let resolver = rt.block_on(build_hickory_resolver()); - - // Warm up - println!("Warming up..."); - for _ in 0..5 { - let w = build_query_vec("example.com"); - let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary_same, timeout)); - let _ = rt.block_on(numa::forward::forward_query_raw(&w, &primary_dual, timeout)); - let _ = rt.block_on(numa::forward::forward_query_raw(&w, &secondary_dual, timeout)); - let _ = rt.block_on(query_hickory_doh(&resolver, "example.com")); +fn stats(v: &mut [f64]) -> (f64, f64, f64, f64, f64) { + if v.is_empty() { + return (0.0, 0.0, 0.0, 0.0, 0.0); } - - // Accumulated samples across all iterations - let mut all_single = Vec::new(); - let mut all_hedge_same = Vec::new(); - let mut all_hedge_dual = Vec::new(); - let mut all_hickory = Vec::new(); - - // Per-iteration summary stats - let mut iter_stats: Vec<[(f64, f64, f64, f64, f64); 4]> = Vec::new(); - - for iter in 1..=iterations { - println!(" iteration {}/{}...", iter, iterations); - - let mut single = Vec::new(); - let mut hedge_same = Vec::new(); - let mut hedge_dual = Vec::new(); - let mut hickory = Vec::new(); - - for domain in DOMAINS { - for _ in 0..ROUNDS { - let wire = build_query_vec(domain); - - let t = Instant::now(); - let _ = rt.block_on(numa::forward::forward_query_raw(&wire, &primary_same, timeout)); - single.push(t.elapsed().as_secs_f64() * 1000.0); - - let t = Instant::now(); - let _ = rt.block_on(numa::forward::forward_with_hedging_raw( - &wire, &primary_same, &primary_same, hedge_delay, timeout, - )); - hedge_same.push(t.elapsed().as_secs_f64() * 1000.0); - - let t = Instant::now(); - let _ = rt.block_on(numa::forward::forward_with_hedging_raw( - &wire, &primary_dual, &secondary_dual, hedge_delay, timeout, - )); - hedge_dual.push(t.elapsed().as_secs_f64() * 1000.0); - - let t = Instant::now(); - let _ = rt.block_on(query_hickory_doh(&resolver, domain)); - hickory.push(t.elapsed().as_secs_f64() * 1000.0); - } - } - - let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { - (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) - }; - iter_stats.push([ - stats(&mut single), - stats(&mut hedge_same), - stats(&mut hedge_dual), - stats(&mut hickory), - ]); - - all_single.extend_from_slice(&single); - all_hedge_same.extend_from_slice(&hedge_same); - all_hedge_dual.extend_from_slice(&hedge_dual); - all_hickory.extend_from_slice(&hickory); - } - - println!(); - println!("=== Per-iteration medians (drift check) ==="); - println!( - "{:<8} {:>10} {:>12} {:>12} {:>10}", - "iter", "Single", "Hedge-same", "Hedge-dual", "Hickory" - ); - for (i, s) in iter_stats.iter().enumerate() { - println!( - "{:<8} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", - i + 1, - s[0].1, - s[1].1, - s[2].1, - s[3].1 - ); - } - - println!(); - println!("=== Per-iteration p99 (drift check) ==="); - println!( - "{:<8} {:>10} {:>12} {:>12} {:>10}", - "iter", "Single", "Hedge-same", "Hedge-dual", "Hickory" - ); - for (i, s) in iter_stats.iter().enumerate() { - println!( - "{:<8} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", - i + 1, - s[0].3, - s[1].3, - s[2].3, - s[3].3 - ); - } - - let final_stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { - (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) - }; - let (s_m, s_med, s_p95, s_p99, s_sd) = final_stats(&mut all_single); - let (hs_m, hs_med, hs_p95, hs_p99, hs_sd) = final_stats(&mut all_hedge_same); - let (hd_m, hd_med, hd_p95, hd_p99, hd_sd) = final_stats(&mut all_hedge_dual); - let (k_m, k_med, k_p95, k_p99, k_sd) = final_stats(&mut all_hickory); - - println!(); - let total = iterations * DOMAINS.len() * ROUNDS; - println!("=== Aggregated across all {} samples per method ===", total); - println!(); - println!( - "{:<10} {:>10} {:>12} {:>12} {:>10}", - "", "Single", "Hedge-same", "Hedge-dual", "Hickory" - ); - println!( - "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", - "mean", s_m, hs_m, hd_m, k_m - ); - println!( - "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", - "median", s_med, hs_med, hd_med, k_med - ); - println!( - "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", - "p95", s_p95, hs_p95, hd_p95, k_p95 - ); - println!( - "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", - "p99", s_p99, hs_p99, hd_p99, k_p99 - ); - println!( - "{:<10} {:>7.1} ms {:>9.1} ms {:>9.1} ms {:>7.1} ms", - "σ", s_sd, hs_sd, hd_sd, k_sd - ); - - println!(); - println!("Hedge-same vs Single: mean {:+.0}%, p95 {:+.0}%, p99 {:+.0}%", - (hs_m - s_m) / s_m * 100.0, - (hs_p95 - s_p95) / s_p95 * 100.0, - (hs_p99 - s_p99) / s_p99 * 100.0); - println!("Hedge-dual vs Single: mean {:+.0}%, p95 {:+.0}%, p99 {:+.0}%", - (hd_m - s_m) / s_m * 100.0, - (hd_p95 - s_p95) / s_p95 * 100.0, - (hd_p99 - s_p99) / s_p99 * 100.0); - println!("Hedge-same vs Hickory: mean {:+.0}%, p95 {:+.0}%, p99 {:+.0}%", - (hs_m - k_m) / k_m * 100.0, - (hs_p95 - k_p95) / k_p95 * 100.0, - (hs_p99 - k_p99) / k_p99 * 100.0); -} - -/// Server-to-server benchmark: Numa vs dnscrypt-proxy vs Unbound. -/// All are full servers: UDP in, encrypted forwarding to Quad9. -/// Numa + dnscrypt: DoH (HTTPS). Unbound: DoT (TLS port 853). -fn run_vs_dnscrypt(rt: &tokio::runtime::Runtime, iterations: usize) { - const DNSCRYPT_ADDR: &str = "127.0.0.1:5455"; - const UNBOUND_ADDR: &str = "127.0.0.1:5456"; - let numa_addr: SocketAddr = NUMA_BENCH.parse().unwrap(); - let dnscrypt_addr: SocketAddr = DNSCRYPT_ADDR.parse().unwrap(); - let unbound_addr: SocketAddr = UNBOUND_ADDR.parse().unwrap(); - - println!("Server-to-Server: Numa vs dnscrypt-proxy vs Unbound"); - println!("Numa (DoH): {}", NUMA_BENCH); - println!("dnscrypt-proxy (DoH): {}", DNSCRYPT_ADDR); - println!("Unbound (DoT): {}", UNBOUND_ADDR); - println!("All forwarding to Quad9 over encrypted transport"); - println!("{} domains × {} rounds × {} iterations\n", - DOMAINS.len(), ROUNDS, iterations); - - // Verify all are up - let servers: Vec<(&str, SocketAddr)> = vec![ - ("Numa", numa_addr), - ("dnscrypt-proxy", dnscrypt_addr), - ("Unbound", unbound_addr), - ]; - for (name, addr) in &servers { - if rt.block_on(query_udp(*addr, "example.com")).is_none() { - eprintln!("{} not responding on {}", name, addr); - std::process::exit(1); - } - } - println!("All servers reachable.\n"); - - // Warm up - println!("Warming up..."); - for _ in 0..5 { - for (_, addr) in &servers { - let _ = rt.block_on(query_udp(*addr, "example.com")); - } - } - - let mut all_numa = Vec::new(); - let mut all_dnscrypt = Vec::new(); - let mut all_unbound = Vec::new(); - let mut iter_stats: Vec<[(f64, f64, f64, f64, f64); 3]> = Vec::new(); - - for iter in 1..=iterations { - println!(" iteration {}/{}...", iter, iterations); - - let mut numa = Vec::new(); - let mut dnscrypt = Vec::new(); - let mut unbound = Vec::new(); - - for domain in DOMAINS { - for round in 0..ROUNDS { - flush_cache(); - std::thread::sleep(Duration::from_millis(5)); - - // Rotate order: 3 servers, 3 possible orderings - let order = round % 3; - let mut measure = |addr: SocketAddr| -> f64 { - let t = Instant::now(); - let _ = rt.block_on(query_udp(addr, domain)); - t.elapsed().as_secs_f64() * 1000.0 - }; - - match order { - 0 => { - numa.push(measure(numa_addr)); - dnscrypt.push(measure(dnscrypt_addr)); - unbound.push(measure(unbound_addr)); - } - 1 => { - dnscrypt.push(measure(dnscrypt_addr)); - unbound.push(measure(unbound_addr)); - numa.push(measure(numa_addr)); - } - _ => { - unbound.push(measure(unbound_addr)); - numa.push(measure(numa_addr)); - dnscrypt.push(measure(dnscrypt_addr)); - } - } - } - } - - let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { - (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) - }; - iter_stats.push([stats(&mut numa), stats(&mut dnscrypt), stats(&mut unbound)]); - - all_numa.extend_from_slice(&numa); - all_dnscrypt.extend_from_slice(&dnscrypt); - all_unbound.extend_from_slice(&unbound); - } - - println!(); - println!("=== Per-iteration medians ==="); - println!("{:<8} {:>10} {:>14} {:>10}", "iter", "Numa", "dnscrypt-proxy", "Unbound"); - for (i, s) in iter_stats.iter().enumerate() { - println!("{:<8} {:>7.1} ms {:>11.1} ms {:>7.1} ms", - i + 1, s[0].1, s[1].1, s[2].1); - } - - println!(); - println!("=== Per-iteration p99 ==="); - println!("{:<8} {:>10} {:>14} {:>10}", "iter", "Numa", "dnscrypt-proxy", "Unbound"); - for (i, s) in iter_stats.iter().enumerate() { - println!("{:<8} {:>7.1} ms {:>11.1} ms {:>7.1} ms", - i + 1, s[0].3, s[1].3, s[2].3); - } - - let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { - (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) - }; - let (n_m, n_med, n_p95, n_p99, n_sd) = stats(&mut all_numa); - let (d_m, d_med, d_p95, d_p99, d_sd) = stats(&mut all_dnscrypt); - let (u_m, u_med, u_p95, u_p99, u_sd) = stats(&mut all_unbound); - - println!(); - let total = iterations * DOMAINS.len() * ROUNDS; - println!("=== Aggregated ({} samples per method) ===", total); - println!(); - println!("{:<10} {:>10} {:>14} {:>10}", "", "Numa", "dnscrypt-proxy", "Unbound"); - println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "mean", n_m, d_m, u_m); - println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "median", n_med, d_med, u_med); - println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "p95", n_p95, d_p95, u_p95); - println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "p99", n_p99, d_p99, u_p99); - println!("{:<10} {:>7.1} ms {:>11.1} ms {:>7.1} ms", "σ", n_sd, d_sd, u_sd); - println!(); - - println!("Numa vs dnscrypt-proxy:"); - println!(" mean: {:+.0}%, median: {:+.0}%, p99: {:+.0}%", - (n_m - d_m) / d_m * 100.0, (n_med - d_med) / d_med * 100.0, (n_p99 - d_p99) / d_p99 * 100.0); - println!("Numa vs Unbound:"); - println!(" mean: {:+.0}%, median: {:+.0}%, p99: {:+.0}%", - (n_m - u_m) / u_m * 100.0, (n_med - u_med) / u_med * 100.0, (n_p99 - u_p99) / u_p99 * 100.0); -} - -/// Numa vs Unbound: both forward over plain UDP to Quad9, caching enabled. -/// Truly equal transport — no TLS, no HTTP/2, pure forwarding + cache. -fn run_vs_unbound(rt: &tokio::runtime::Runtime, iterations: usize) { - const UNBOUND_ADDR: &str = "127.0.0.1:5456"; - let numa_addr: SocketAddr = NUMA_BENCH.parse().unwrap(); - let unbound_addr: SocketAddr = UNBOUND_ADDR.parse().unwrap(); - - println!("Numa vs Unbound (both plain UDP forwarding to Quad9, caching enabled)"); - println!("Numa: {} → 9.9.9.9:53 UDP", NUMA_BENCH); - println!("Unbound: {} → 9.9.9.9:53 UDP", UNBOUND_ADDR); - println!("{} domains × {} rounds × {} iterations\n", - DOMAINS.len(), ROUNDS, iterations); - - if rt.block_on(query_udp(numa_addr, "example.com")).is_none() { - eprintln!("Numa not responding"); std::process::exit(1); - } - if rt.block_on(query_udp(unbound_addr, "example.com")).is_none() { - eprintln!("Unbound not responding"); std::process::exit(1); - } - println!("Both servers reachable.\n"); - - println!("Warming up..."); - for _ in 0..5 { - let _ = rt.block_on(query_udp(numa_addr, "example.com")); - let _ = rt.block_on(query_udp(unbound_addr, "example.com")); - } - - let mut all_numa = Vec::new(); - let mut all_unbound = Vec::new(); - let mut iter_stats: Vec<[(f64, f64, f64, f64, f64); 2]> = Vec::new(); - - for iter in 1..=iterations { - println!(" iteration {}/{}...", iter, iterations); - - let mut numa = Vec::new(); - let mut unbound = Vec::new(); - - for domain in DOMAINS { - for round in 0..ROUNDS { - // No cache flushing — both serve from cache after first hit - let mut measure = |addr: SocketAddr| -> f64 { - let t = Instant::now(); - let _ = rt.block_on(query_udp(addr, domain)); - t.elapsed().as_secs_f64() * 1000.0 - }; - - if round % 2 == 0 { - numa.push(measure(numa_addr)); - unbound.push(measure(unbound_addr)); - } else { - unbound.push(measure(unbound_addr)); - numa.push(measure(numa_addr)); - } - } - } - - let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { - (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) - }; - iter_stats.push([stats(&mut numa), stats(&mut unbound)]); - - all_numa.extend_from_slice(&numa); - all_unbound.extend_from_slice(&unbound); - } - - println!(); - println!("=== Per-iteration medians ==="); - println!("{:<8} {:>10} {:>10}", "iter", "Numa", "Unbound"); - for (i, s) in iter_stats.iter().enumerate() { - println!("{:<8} {:>7.1} ms {:>7.1} ms", i + 1, s[0].1, s[1].1); - } - - println!(); - println!("=== Per-iteration p99 ==="); - println!("{:<8} {:>10} {:>10}", "iter", "Numa", "Unbound"); - for (i, s) in iter_stats.iter().enumerate() { - println!("{:<8} {:>7.1} ms {:>7.1} ms", i + 1, s[0].3, s[1].3); - } - - let stats = |v: &mut Vec| -> (f64, f64, f64, f64, f64) { - (mean(v), median(v), percentile(v, 95.0), percentile(v, 99.0), stddev(v)) - }; - let (n_m, n_med, n_p95, n_p99, n_sd) = stats(&mut all_numa); - let (u_m, u_med, u_p95, u_p99, u_sd) = stats(&mut all_unbound); - - println!(); - let total = iterations * DOMAINS.len() * ROUNDS; - println!("=== Aggregated ({} samples per method) ===", total); - println!(); - println!("{:<10} {:>10} {:>10}", "", "Numa", "Unbound"); - println!("{:<10} {:>7.1} ms {:>7.1} ms", "mean", n_m, u_m); - println!("{:<10} {:>7.1} ms {:>7.1} ms", "median", n_med, u_med); - println!("{:<10} {:>7.1} ms {:>7.1} ms", "p95", n_p95, u_p95); - println!("{:<10} {:>7.1} ms {:>7.1} ms", "p99", n_p99, u_p99); - println!("{:<10} {:>7.1} ms {:>7.1} ms", "σ", n_sd, u_sd); - println!(); - - println!("Numa vs Unbound:"); - println!(" mean: {:+.1} ms ({:+.0}%)", n_m - u_m, (n_m - u_m) / u_m * 100.0); - println!(" median: {:+.1} ms ({:+.0}%)", n_med - u_med, (n_med - u_med) / u_med * 100.0); - println!(" p95: {:+.1} ms ({:+.0}%)", n_p95 - u_p95, (n_p95 - u_p95) / u_p95 * 100.0); - println!(" p99: {:+.1} ms ({:+.0}%)", n_p99 - u_p99, (n_p99 - u_p99) / u_p99 * 100.0); -} - -/// Build a DNS query as a Vec for use with forward_query_raw. -fn build_query_vec(domain: &str) -> Vec { - let mut buf = vec![0u8; 512]; - let len = build_query(&mut buf, domain); - buf.truncate(len); - buf -} - -fn measure R, R>(_rt: &tokio::runtime::Runtime, f: F) -> f64 { - let start = Instant::now(); - f(); - start.elapsed().as_secs_f64() * 1000.0 -} - -fn mean(v: &[f64]) -> f64 { - v.iter().sum::() / v.len() as f64 -} - -fn stddev(v: &[f64]) -> f64 { - let m = mean(v); - let var = v.iter().map(|x| (x - m).powi(2)).sum::() / v.len() as f64; - var.sqrt() -} - -fn median(v: &mut [f64]) -> f64 { + let mean = v.iter().sum::() / v.len() as f64; v.sort_by(|a, b| a.partial_cmp(b).unwrap()); let n = v.len(); - if n % 2 == 0 { + let median = if n % 2 == 0 { (v[n / 2 - 1] + v[n / 2]) / 2.0 } else { v[n / 2] - } + }; + let p95 = v[((n as f64 * 0.95).round() as usize).min(n - 1)]; + let p99 = v[((n as f64 * 0.99).round() as usize).min(n - 1)]; + let var = v.iter().map(|x| (x - mean).powi(2)).sum::() / n as f64; + (mean, median, p95, p99, var.sqrt()) } -fn percentile(sorted: &[f64], p: f64) -> f64 { - let idx = (p / 100.0 * (sorted.len() - 1) as f64).round() as usize; - sorted[idx.min(sorted.len() - 1)] -} +// ── Query helpers ─────────────────────────────────────────────── -fn format_delta(delta: f64) -> String { - if delta > 0.0 { - format!("+{:.1}", delta) - } else { - format!("{:.1}", delta) - } -} - -/// Query a DNS server over UDP. async fn query_udp(addr: SocketAddr, domain: &str) -> Option<()> { use tokio::net::UdpSocket; - let sock = UdpSocket::bind("0.0.0.0:0").await.ok()?; let mut buf = vec![0u8; 512]; let len = build_query(&mut buf, domain); - sock.send_to(&buf[..len], addr).await.ok()?; - let mut resp = vec![0u8; 4096]; tokio::time::timeout(Duration::from_secs(10), sock.recv_from(&mut resp)) .await .ok()? .ok()?; - Some(()) } -/// Build a shared Hickory DoH resolver (reuses TLS connection across queries). +async fn query_dot_once( + addr: &str, + domain: &str, + tls_config: &std::sync::Arc, +) -> Result<(), Box> { + use rustls::pki_types::ServerName; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + use tokio::net::TcpStream; + use tokio_rustls::TlsConnector; + + let connector = TlsConnector::from(tls_config.clone()); + let stream = TcpStream::connect(addr).await?; + let server_name = ServerName::try_from("localhost")?; + let mut tls = connector.connect(server_name, stream).await?; + + let mut buf = vec![0u8; 512]; + let len = build_query(&mut buf, domain); + let msg = &buf[..len]; + + let mut out = Vec::with_capacity(2 + msg.len()); + out.extend_from_slice(&(msg.len() as u16).to_be_bytes()); + out.extend_from_slice(msg); + tls.write_all(&out).await?; + + let mut len_buf = [0u8; 2]; + tls.read_exact(&mut len_buf).await?; + let resp_len = u16::from_be_bytes(len_buf) as usize; + let mut resp = vec![0u8; resp_len]; + tls.read_exact(&mut resp).await?; + Ok(()) +} + +async fn query_doh_server( + client: &reqwest::Client, + url: &str, + wire: &[u8], + host: Option<&str>, +) -> Result, Box> { + let mut req = client + .post(url) + .header("content-type", "application/dns-message") + .header("accept", "application/dns-message") + .body(wire.to_vec()); + if let Some(h) = host { + req = req.header("host", h); + } + let resp = req.send().await?.error_for_status()?; + Ok(resp.bytes().await?.to_vec()) +} + async fn build_hickory_resolver() -> hickory_resolver::TokioResolver { use hickory_resolver::config::*; - let ns = NameServerConfig { socket_addr: "9.9.9.9:443".parse().unwrap(), protocol: hickory_proto::xfer::Protocol::Https, @@ -1593,29 +893,79 @@ async fn build_hickory_resolver() -> hickory_resolver::TokioResolver { bind_addr: None, http_endpoint: Some("/dns-query".to_string()), }; - let config = ResolverConfig::from_parts(None, vec![], NameServerConfigGroup::from(vec![ns])); - let mut opts = ResolverOpts::default(); opts.cache_size = 0; opts.num_concurrent_reqs = 1; opts.timeout = Duration::from_secs(10); - hickory_resolver::TokioResolver::builder_with_config(config, Default::default()) .with_options(opts) .build() } -/// Query using the shared Hickory resolver. -async fn query_hickory_doh( - resolver: &hickory_resolver::TokioResolver, - domain: &str, -) -> Option<()> { +async fn query_hickory_doh(resolver: &hickory_resolver::TokioResolver, domain: &str) -> Option<()> { use hickory_resolver::proto::rr::RecordType; let _ = resolver.lookup(domain, RecordType::A).await.ok()?; Some(()) } +fn build_insecure_tls_config() -> std::sync::Arc { + use rustls::client::danger::{HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier}; + use rustls::pki_types::{CertificateDer, ServerName, UnixTime}; + use rustls::DigitallySignedStruct; + + #[derive(Debug)] + struct NoVerify; + impl ServerCertVerifier for NoVerify { + fn verify_server_cert( + &self, + _: &CertificateDer<'_>, + _: &[CertificateDer<'_>], + _: &ServerName<'_>, + _: &[u8], + _: UnixTime, + ) -> Result { + Ok(ServerCertVerified::assertion()) + } + fn verify_tls12_signature( + &self, + _: &[u8], + _: &CertificateDer<'_>, + _: &DigitallySignedStruct, + ) -> Result { + Ok(HandshakeSignatureValid::assertion()) + } + fn verify_tls13_signature( + &self, + _: &[u8], + _: &CertificateDer<'_>, + _: &DigitallySignedStruct, + ) -> Result { + Ok(HandshakeSignatureValid::assertion()) + } + fn supported_verify_schemes(&self) -> Vec { + rustls::crypto::ring::default_provider() + .signature_verification_algorithms + .supported_schemes() + } + } + std::sync::Arc::new( + rustls::ClientConfig::builder() + .dangerous() + .with_custom_certificate_verifier(std::sync::Arc::new(NoVerify)) + .with_no_client_auth(), + ) +} + +// ── Wire helpers ──────────────────────────────────────────────── + +fn build_query_vec(domain: &str) -> Vec { + let mut buf = vec![0u8; 512]; + let len = build_query(&mut buf, domain); + buf.truncate(len); + buf +} + fn build_query(buf: &mut [u8], domain: &str) -> usize { let mut pos = 0; buf[pos..pos + 2].copy_from_slice(&0x1234u16.to_be_bytes()); @@ -1626,7 +976,6 @@ fn build_query(buf: &mut [u8], domain: &str) -> usize { pos += 2; buf[pos..pos + 6].fill(0); pos += 6; - for label in domain.split('.') { buf[pos] = label.len() as u8; pos += 1; @@ -1644,6 +993,11 @@ fn build_query(buf: &mut [u8], domain: &str) -> usize { fn flush_cache() { let _ = std::process::Command::new("curl") - .args(["-s", "-X", "DELETE", &format!("http://127.0.0.1:{NUMA_API}/cache")]) + .args([ + "-s", + "-X", + "DELETE", + &format!("http://127.0.0.1:{NUMA_API}/cache"), + ]) .output(); } diff --git a/src/forward.rs b/src/forward.rs index 401ae1c..6afb7e5 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -214,15 +214,11 @@ pub async fn forward_query( upstream: &Upstream, timeout_duration: Duration, ) -> Result { - match upstream { - Upstream::Udp(addr) => forward_udp(query, *addr, timeout_duration).await, - Upstream::Doh { url, client } => forward_doh(query, url, client, timeout_duration).await, - Upstream::Dot { - addr, - tls_name, - connector, - } => forward_dot(query, *addr, tls_name, connector, timeout_duration).await, - } + let mut send_buffer = BytePacketBuffer::new(); + query.write(&mut send_buffer)?; + let data = forward_query_raw(send_buffer.filled(), upstream, timeout_duration).await?; + let mut recv_buffer = BytePacketBuffer::from_bytes(&data); + DnsPacket::from_buffer(&mut recv_buffer) } pub(crate) async fn forward_udp( @@ -284,13 +280,13 @@ pub(crate) async fn forward_tcp( DnsPacket::from_buffer(&mut recv_buffer) } -async fn forward_dot( - query: &DnsPacket, +async fn forward_dot_raw( + wire: &[u8], addr: SocketAddr, tls_name: &Option, connector: &tokio_rustls::TlsConnector, timeout_duration: Duration, -) -> Result { +) -> Result> { use rustls::pki_types::ServerName; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::TcpStream; @@ -303,10 +299,6 @@ async fn forward_dot( let tcp = timeout(timeout_duration, TcpStream::connect(addr)).await??; let mut tls = timeout(timeout_duration, connector.connect(server_name, tcp)).await??; - let mut send_buffer = BytePacketBuffer::new(); - query.write(&mut send_buffer)?; - let wire = send_buffer.filled(); - let mut outbuf = Vec::with_capacity(2 + wire.len()); outbuf.extend_from_slice(&(wire.len() as u16).to_be_bytes()); outbuf.extend_from_slice(wire); @@ -319,22 +311,7 @@ async fn forward_dot( let mut data = vec![0u8; resp_len]; timeout(timeout_duration, tls.read_exact(&mut data)).await??; - let mut recv_buffer = BytePacketBuffer::from_bytes(&data); - DnsPacket::from_buffer(&mut recv_buffer) -} - -async fn forward_doh( - query: &DnsPacket, - url: &str, - client: &reqwest::Client, - timeout_duration: Duration, -) -> Result { - let mut send_buffer = BytePacketBuffer::new(); - query.write(&mut send_buffer)?; - - let resp_bytes = forward_doh_raw(send_buffer.filled(), url, client, timeout_duration).await?; - let mut recv_buffer = BytePacketBuffer::from_bytes(&resp_bytes); - DnsPacket::from_buffer(&mut recv_buffer) + Ok(data) } pub async fn forward_query_raw( @@ -345,6 +322,11 @@ pub async fn forward_query_raw( match upstream { Upstream::Udp(addr) => forward_udp_raw(wire, *addr, timeout_duration).await, Upstream::Doh { url, client } => forward_doh_raw(wire, url, client, timeout_duration).await, + Upstream::Dot { + addr, + tls_name, + connector, + } => forward_dot_raw(wire, *addr, tls_name, connector, timeout_duration).await, } } @@ -405,7 +387,10 @@ pub async fn forward_with_hedging_raw( match (primary_err, secondary_err) { (Some(pe), Some(_)) => return Err(pe), - (pe, se) => { primary_err = pe; secondary_err = se; } + (pe, se) => { + primary_err = pe; + secondary_err = se; + } } } } @@ -516,7 +501,7 @@ pub async fn keepalive_doh(upstream: &Upstream) { 0x01, 0x00, // flags: RD=1 0x00, 0x01, // QDCOUNT=1 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // AN=0, NS=0, AR=0 - 0x00, // root name (.) + 0x00, // root name (.) 0x00, 0x02, // type NS 0x00, 0x01, // class IN ]; diff --git a/src/recursive.rs b/src/recursive.rs index 2609f7f..190a57a 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -15,8 +15,8 @@ use crate::srtt::SrttCache; const MAX_REFERRAL_DEPTH: u8 = 10; const MAX_CNAME_DEPTH: u8 = 8; -const NS_QUERY_TIMEOUT: Duration = Duration::from_millis(800); -const TCP_TIMEOUT: Duration = Duration::from_millis(1500); +const NS_QUERY_TIMEOUT: Duration = Duration::from_millis(400); +const TCP_TIMEOUT: Duration = Duration::from_millis(400); const UDP_FAIL_THRESHOLD: u8 = 3; static QUERY_ID: AtomicU16 = AtomicU16::new(1); @@ -213,11 +213,13 @@ pub(crate) fn resolve_iterative<'a>( ns_addrs[ns_idx], q_type, q_name, current_zone, referral_depth ); - let response = match send_query_hedged(q_name, q_type, &ns_addrs[ns_idx..], srtt).await { + let response = match send_query_hedged(q_name, q_type, &ns_addrs[ns_idx..], srtt).await + { Ok(r) => r, Err(e) => { debug!("recursive: NS query failed: {}", e); - ns_idx += 2; // both tried, skip past them + let remaining = ns_addrs.len().saturating_sub(ns_idx); + ns_idx += remaining.min(2); continue; } }; @@ -660,7 +662,10 @@ async fn send_query_hedged( } match (a_err.take(), b_err.take()) { (Some(e), Some(_)) => return Err(e), - (a, b) => { a_err = a; b_err = b; } + (a, b) => { + a_err = a; + b_err = b; + } } } } else { @@ -739,9 +744,13 @@ async fn send_query( "send_query: {} consecutive UDP failures — switching to TCP-first", fails ); + // Now that UDP is disabled, retry this query via TCP + return tcp_with_srtt(&query, server, srtt, start).await; } - debug!("send_query: UDP failed for {}: {}, trying TCP", server, e); - tcp_with_srtt(&query, server, srtt, start).await + // UDP works in general (priming succeeded) but this server timed out. + // Don't waste another 400ms on TCP — the server is unreachable. + srtt.write().unwrap().record_failure(server.ip()); + Err(e) } } } @@ -1021,10 +1030,10 @@ mod tests { } /// TCP-only server returns authoritative answer directly. - /// Verifies: UDP fails → TCP fallback → resolves. + /// Verifies: when UDP is disabled, TCP-first resolves. #[tokio::test] async fn tcp_fallback_resolves_when_udp_blocked() { - UDP_DISABLED.store(false, Ordering::Relaxed); + UDP_DISABLED.store(true, Ordering::Relaxed); UDP_FAILURES.store(0, Ordering::Release); let server_addr = spawn_tcp_dns_server(|query| { @@ -1107,7 +1116,7 @@ mod tests { #[tokio::test] async fn tcp_fallback_handles_nxdomain() { - UDP_DISABLED.store(false, Ordering::Relaxed); + UDP_DISABLED.store(true, Ordering::Relaxed); UDP_FAILURES.store(0, Ordering::Release); let server_addr = spawn_tcp_dns_server(|query| { -- 2.34.1 From c1b651aa636acf9fe582e5809f43e913ce182f88 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 06:25:42 +0300 Subject: [PATCH 015/139] chore: remove obsolete bash benchmark script --- scripts/bench-recursive.sh | 115 ------------------------------------- 1 file changed, 115 deletions(-) delete mode 100755 scripts/bench-recursive.sh diff --git a/scripts/bench-recursive.sh b/scripts/bench-recursive.sh deleted file mode 100755 index 1a1ab71..0000000 --- a/scripts/bench-recursive.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env bash -# Bench: Numa cold-cache recursive resolution vs dig (forwarded through system resolver) -# -# Measures cold-cache recursive resolution time for Numa. -# Flushes Numa's cache before each query to ensure cold-cache. -# Compares against dig querying a public recursive resolver (no cache advantage). -# -# Usage: ./scripts/bench-recursive.sh [numa_port] - -set -euo pipefail - -NUMA_ADDR="${NUMA_ADDR:-127.0.0.1}" -NUMA_PORT="${NUMA_PORT:-${1:-53}}" -API_PORT="${API_PORT:-5380}" -ROUNDS=3 - -DOMAINS=( - "example.com" - "rust-lang.org" - "kernel.org" - "signal.org" - "archlinux.org" - "openbsd.org" - "git-scm.com" - "sqlite.org" - "wireguard.com" - "mozilla.org" -) - -GREEN='\033[0;32m' -AMBER='\033[0;33m' -CYAN='\033[0;36m' -DIM='\033[0;90m' -BOLD='\033[1m' -RESET='\033[0m' - -echo -e "${CYAN}${BOLD}Recursive DNS Resolution Benchmark${RESET}" -echo -e "${DIM}Numa (cold cache, recursive from root) vs dig @1.1.1.1 (public resolver)${RESET}" -echo -e "${DIM}Rounds per domain: ${ROUNDS}${RESET}" -echo "" - -# Verify Numa is reachable -if ! dig @${NUMA_ADDR} -p ${NUMA_PORT} +short +time=3 +tries=1 example.com A &>/dev/null; then - echo -e "${AMBER}Numa not responding on ${NUMA_ADDR}:${NUMA_PORT}${RESET}" >&2 - exit 1 -fi - -# Verify we can flush cache -if ! curl -s -X DELETE "http://${NUMA_ADDR}:${API_PORT}/cache" &>/dev/null; then - echo -e "${AMBER}Cannot flush cache via API at ${NUMA_ADDR}:${API_PORT}${RESET}" >&2 - exit 1 -fi - -measure_ms() { - local start end - start=$(python3 -c 'import time; print(time.time())') - eval "$1" &>/dev/null - end=$(python3 -c 'import time; print(time.time())') - python3 -c "print(round(($end - $start) * 1000, 1))" -} - -printf "${BOLD}%-22s %10s %10s %8s${RESET}\n" "Domain" "Numa (ms)" "1.1.1.1" "Delta" -printf "%-22s %10s %10s %8s\n" "----------------------" "----------" "----------" "--------" - -numa_total=0 -dig_total=0 -count=0 - -for domain in "${DOMAINS[@]}"; do - numa_sum=0 - dig_sum=0 - - for ((r=1; r<=ROUNDS; r++)); do - # Flush Numa cache - curl -s -X DELETE "http://${NUMA_ADDR}:${API_PORT}/cache" &>/dev/null - sleep 0.05 - - # Measure Numa (recursive from root, cold cache) - ms=$(measure_ms "dig @${NUMA_ADDR} -p ${NUMA_PORT} +short +time=10 +tries=1 ${domain} A") - numa_sum=$(python3 -c "print(round($numa_sum + $ms, 1))") - - # Measure dig against 1.1.1.1 (Cloudflare — warm cache, but shows baseline) - ms=$(measure_ms "dig @1.1.1.1 +short +time=10 +tries=1 ${domain} A") - dig_sum=$(python3 -c "print(round($dig_sum + $ms, 1))") - done - - numa_avg=$(python3 -c "print(round($numa_sum / $ROUNDS, 1))") - dig_avg=$(python3 -c "print(round($dig_sum / $ROUNDS, 1))") - delta=$(python3 -c "d = round($numa_avg - $dig_avg, 1); print(f'+{d}' if d > 0 else str(d))") - - # Color the delta - delta_color="$GREEN" - if python3 -c "exit(0 if $numa_avg > $dig_avg * 1.5 else 1)" 2>/dev/null; then - delta_color="$AMBER" - fi - - printf "%-22s %8s ms %8s ms ${delta_color}%6s ms${RESET}\n" "$domain" "$numa_avg" "$dig_avg" "$delta" - - numa_total=$(python3 -c "print(round($numa_total + $numa_avg, 1))") - dig_total=$(python3 -c "print(round($dig_total + $dig_avg, 1))") - count=$((count + 1)) -done - -echo "" -numa_mean=$(python3 -c "print(round($numa_total / $count, 1))") -dig_mean=$(python3 -c "print(round($dig_total / $count, 1))") -delta_mean=$(python3 -c "d = round($numa_mean - $dig_mean, 1); print(f'+{d}' if d > 0 else str(d))") - -printf "${BOLD}%-22s %8s ms %8s ms %6s ms${RESET}\n" "AVERAGE" "$numa_mean" "$dig_mean" "$delta_mean" - -echo "" -echo -e "${DIM}Note: Numa resolves recursively from root hints (cold cache).${RESET}" -echo -e "${DIM}1.1.1.1 serves from Cloudflare's global cache (warm). The comparison${RESET}" -echo -e "${DIM}is intentionally unfair — it shows Numa's worst case vs the best case${RESET}" -echo -e "${DIM}of a global anycast resolver. Cached Numa queries resolve in <1ms.${RESET}" -- 2.34.1 From 72b540a44aadbb34867e2d12ab9c9630015b4b44 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 06:27:38 +0300 Subject: [PATCH 016/139] feat: wire-level cache, serve-stale, raw wire passthrough - Cache stores raw DNS wire bytes + TTL offsets (2.4x memory reduction) - Serve-stale (RFC 8767): expired entries returned with TTL=1 for 1hr - handle_query captures raw_len from recv_from for zero-copy forwarding - resolve_query accepts raw wire bytes, forwards without re-serializing - wire.rs: TTL offset scanner, ID/TTL patching, question extraction - 52 wire tests + 16 cache regression tests --- src/ctx.rs | 34 +++++-- src/wire.rs | 270 ++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 231 insertions(+), 73 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 2b26a06..46316f2 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -16,9 +16,7 @@ use crate::blocklist::BlocklistStore; use crate::buffer::BytePacketBuffer; use crate::cache::{DnsCache, DnssecStatus}; use crate::config::{UpstreamMode, ZoneMap}; -use crate::forward::{ - forward_query_raw, forward_with_failover_raw, Upstream, UpstreamPool, -}; +use crate::forward::{forward_query_raw, forward_with_failover_raw, Upstream, UpstreamPool}; use crate::header::ResultCode; use crate::health::HealthMeta; use crate::lan::PeerStore; @@ -182,9 +180,7 @@ pub async fn resolve_query( // (e.g. Tailscale .ts.net, VPC private zones) let upstream = Upstream::Udp(fwd_addr); match forward_and_cache(raw_wire, &upstream, ctx, &qname, qtype).await { - Ok(resp) => { - (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate) - } + Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), Err(e) => { error!( "{} | {:?} {} | FORWARD ERROR | {}", @@ -224,17 +220,35 @@ pub async fn resolve_query( (resp, path, DnssecStatus::Indeterminate) } else { let pool = ctx.upstream_pool.lock().unwrap().clone(); - match forward_with_failover_raw(raw_wire, &pool, &ctx.srtt, ctx.timeout, ctx.hedge_delay).await { + match forward_with_failover_raw( + raw_wire, + &pool, + &ctx.srtt, + ctx.timeout, + ctx.hedge_delay, + ) + .await + { Ok(resp_wire) => { ctx.cache.write().unwrap().insert_wire( - &qname, qtype, &resp_wire, DnssecStatus::Indeterminate, + &qname, + qtype, + &resp_wire, + DnssecStatus::Indeterminate, ); let mut buf = BytePacketBuffer::from_bytes(&resp_wire); match DnsPacket::from_buffer(&mut buf) { Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), Err(e) => { - error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e); - (DnsPacket::response_from(&query, ResultCode::SERVFAIL), QueryPath::UpstreamError, DnssecStatus::Indeterminate) + error!( + "{} | {:?} {} | PARSE ERROR | {}", + src_addr, qtype, qname, e + ); + ( + DnsPacket::response_from(&query, ResultCode::SERVFAIL), + QueryPath::UpstreamError, + DnssecStatus::Indeterminate, + ) } } } diff --git a/src/wire.rs b/src/wire.rs index 6b68c3a..a93fe27 100644 --- a/src/wire.rs +++ b/src/wire.rs @@ -309,7 +309,11 @@ mod tests { #[test] fn scan_single_a_record() { - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let wire = to_wire(&pkt); let meta = scan_ttl_offsets(&wire).unwrap(); @@ -341,15 +345,20 @@ mod tests { let ttls: Vec = meta .ttl_offsets .iter() - .map(|&off| u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]])) + .map(|&off| { + u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]) + }) .collect(); assert_eq!(ttls, vec![300, 600, 120]); } #[test] fn scan_mixed_sections() { - let mut pkt = - response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); pkt.authorities .push(ns_record("example.com", "ns1.example.com", 3600)); pkt.authorities @@ -382,7 +391,9 @@ mod tests { let ttls: Vec = meta .ttl_offsets .iter() - .map(|&off| u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]])) + .map(|&off| { + u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]) + }) .collect(); assert_eq!(ttls, vec![300, 600]); } @@ -410,15 +421,20 @@ mod tests { let ttls: Vec = meta .ttl_offsets .iter() - .map(|&off| u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]])) + .map(|&off| { + u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]) + }) .collect(); assert_eq!(ttls, vec![300, 600]); } #[test] fn scan_edns_opt_excluded() { - let mut pkt = - response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); pkt.edns = Some(EdnsOpt { udp_payload_size: 1232, extended_rcode: 0, @@ -436,8 +452,11 @@ mod tests { #[test] fn scan_rrsig_only_wire_ttl() { - let mut pkt = - response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); pkt.answers.push(DnsRecord::RRSIG { domain: "example.com".into(), type_covered: 1, // A @@ -460,8 +479,7 @@ mod tests { // Both wire TTLs should be 300, not 9999 for &off in &meta.ttl_offsets { - let ttl = - u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]); + let ttl = u32::from_be_bytes([wire[off], wire[off + 1], wire[off + 2], wire[off + 3]]); assert_eq!(ttl, 300); } @@ -479,8 +497,11 @@ mod tests { #[test] fn scan_nsec_variable_rdata() { - let mut pkt = - response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); pkt.authorities.push(DnsRecord::NSEC { domain: "example.com".into(), next_domain: "z.example.com".into(), @@ -534,7 +555,11 @@ mod tests { #[test] fn scan_truncated_wire_returns_error() { - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let wire = to_wire(&pkt); // Truncate mid-record let truncated = &wire[..wire.len() - 2]; @@ -558,7 +583,11 @@ mod tests { #[test] fn patch_ttl_single() { - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let mut wire = to_wire(&pkt); let meta = scan_ttl_offsets(&wire).unwrap(); @@ -597,7 +626,11 @@ mod tests { #[test] fn patch_ttl_preserves_other_bytes() { - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let original = to_wire(&pkt); let mut patched = original.clone(); let meta = scan_ttl_offsets(&patched).unwrap(); @@ -606,10 +639,7 @@ mod tests { // Every byte outside TTL offsets should be identical for (i, (&orig, &patc)) in original.iter().zip(patched.iter()).enumerate() { - let in_ttl = meta - .ttl_offsets - .iter() - .any(|&off| i >= off && i < off + 4); + let in_ttl = meta.ttl_offsets.iter().any(|&off| i >= off && i < off + 4); if !in_ttl { assert_eq!( orig, patc, @@ -622,7 +652,11 @@ mod tests { #[test] fn patch_ttl_zero() { - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let mut wire = to_wire(&pkt); let meta = scan_ttl_offsets(&wire).unwrap(); @@ -634,7 +668,11 @@ mod tests { #[test] fn patch_ttl_max_u32() { - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let mut wire = to_wire(&pkt); let meta = scan_ttl_offsets(&wire).unwrap(); @@ -646,8 +684,11 @@ mod tests { #[test] fn patch_ttl_edns_untouched() { - let mut pkt = - response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); pkt.edns = Some(EdnsOpt { udp_payload_size: 1232, extended_rcode: 0, @@ -664,10 +705,7 @@ mod tests { // Only the A record's TTL bytes should differ; everything else // (including the OPT "TTL" containing the DO bit) must be unchanged. for (i, (&orig, &patc)) in original.iter().zip(patched.iter()).enumerate() { - let in_ttl = meta - .ttl_offsets - .iter() - .any(|&off| i >= off && i < off + 4); + let in_ttl = meta.ttl_offsets.iter().any(|&off| i >= off && i < off + 4); if !in_ttl { assert_eq!( orig, patc, @@ -682,7 +720,11 @@ mod tests { #[test] fn patch_id_basic() { - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let mut wire = to_wire(&pkt); patch_id(&mut wire, 0xABCD); @@ -691,7 +733,11 @@ mod tests { #[test] fn patch_id_preserves_flags() { - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let original = to_wire(&pkt); let mut patched = original.clone(); @@ -703,7 +749,11 @@ mod tests { #[test] fn patch_id_zero() { - let pkt = response(0xFFFF, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0xFFFF, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let mut wire = to_wire(&pkt); patch_id(&mut wire, 0x0000); @@ -782,7 +832,11 @@ mod tests { #[test] fn round_trip_simple_a() { - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let wire = to_wire(&pkt); let meta = scan_ttl_offsets(&wire).unwrap(); @@ -808,8 +862,11 @@ mod tests { #[test] fn round_trip_edns_survives() { - let mut pkt = - response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let mut pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); pkt.edns = Some(EdnsOpt { udp_payload_size: 1232, extended_rcode: 0, @@ -1017,7 +1074,11 @@ mod tests { #[test] fn cache_insert_lookup_hit() { let mut cache = DnsCache::new(100, 1, 3600); - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); cache.insert("example.com", QueryType::A, &pkt); let (result, status) = cache @@ -1030,10 +1091,16 @@ mod tests { #[test] fn cache_lookup_adjusts_ttl() { let mut cache = DnsCache::new(100, 1, 3600); - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); cache.insert("example.com", QueryType::A, &pkt); - let (result, _) = cache.lookup_with_status("example.com", QueryType::A).unwrap(); + let (result, _) = cache + .lookup_with_status("example.com", QueryType::A) + .unwrap(); // TTL should be <= 300 (at most original, reduced by elapsed time) assert!(result.answers[0].ttl() <= 300); assert!(result.answers[0].ttl() > 0); @@ -1042,7 +1109,11 @@ mod tests { #[test] fn cache_miss_wrong_domain() { let mut cache = DnsCache::new(100, 1, 3600); - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); cache.insert("example.com", QueryType::A, &pkt); assert!(cache @@ -1053,7 +1124,11 @@ mod tests { #[test] fn cache_miss_wrong_qtype() { let mut cache = DnsCache::new(100, 1, 3600); - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); cache.insert("example.com", QueryType::A, &pkt); assert!(cache @@ -1064,8 +1139,16 @@ mod tests { #[test] fn cache_overwrite_no_double_count() { let mut cache = DnsCache::new(100, 1, 3600); - let pkt1 = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); - let pkt2 = response(0x5678, "example.com", vec![a_record("example.com", "5.6.7.8", 600)]); + let pkt1 = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); + let pkt2 = response( + 0x5678, + "example.com", + vec![a_record("example.com", "5.6.7.8", 600)], + ); cache.insert("example.com", QueryType::A, &pkt1); assert_eq!(cache.len(), 1); @@ -1073,7 +1156,9 @@ mod tests { cache.insert("example.com", QueryType::A, &pkt2); assert_eq!(cache.len(), 1); // no double count - let (result, _) = cache.lookup_with_status("example.com", QueryType::A).unwrap(); + let (result, _) = cache + .lookup_with_status("example.com", QueryType::A) + .unwrap(); match &result.answers[0] { DnsRecord::A { addr, .. } => { assert_eq!(*addr, "5.6.7.8".parse::().unwrap()) @@ -1085,7 +1170,11 @@ mod tests { #[test] fn cache_ttl_clamped_min() { let mut cache = DnsCache::new(100, 60, 3600); - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 5)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 5)], + ); cache.insert("example.com", QueryType::A, &pkt); let (remaining, total) = cache.ttl_remaining("example.com", QueryType::A).unwrap(); @@ -1096,8 +1185,11 @@ mod tests { #[test] fn cache_ttl_clamped_max() { let mut cache = DnsCache::new(100, 1, 3600); - let pkt = - response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 999999)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 999999)], + ); cache.insert("example.com", QueryType::A, &pkt); let (_, total) = cache.ttl_remaining("example.com", QueryType::A).unwrap(); @@ -1110,7 +1202,11 @@ mod tests { assert!(cache.is_empty()); assert_eq!(cache.len(), 0); - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); cache.insert("example.com", QueryType::A, &pkt); assert!(!cache.is_empty()); assert_eq!(cache.len(), 1); @@ -1124,7 +1220,11 @@ mod tests { #[test] fn cache_remove_domain() { let mut cache = DnsCache::new(100, 1, 3600); - let pkt_a = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt_a = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let pkt_aaaa = response( 0x5678, "example.com", @@ -1143,8 +1243,16 @@ mod tests { #[test] fn cache_list_entries() { let mut cache = DnsCache::new(100, 1, 3600); - let pkt_a = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); - let pkt_b = response(0x5678, "test.org", vec![a_record("test.org", "5.6.7.8", 600)]); + let pkt_a = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); + let pkt_b = response( + 0x5678, + "test.org", + vec![a_record("test.org", "5.6.7.8", 600)], + ); cache.insert("example.com", QueryType::A, &pkt_a); cache.insert("test.org", QueryType::A, &pkt_b); @@ -1160,7 +1268,11 @@ mod tests { let mut cache = DnsCache::new(100, 1, 3600); let empty = cache.heap_bytes(); - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); cache.insert("example.com", QueryType::A, &pkt); assert!(cache.heap_bytes() > empty); } @@ -1173,7 +1285,11 @@ mod tests { assert!(cache.needs_warm("example.com")); // Both A and AAAA cached → does not need warm - let pkt_a = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt_a = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); let pkt_aaaa = response( 0x5678, "example.com", @@ -1194,7 +1310,11 @@ mod tests { let mut cache = DnsCache::new(100, 60, 3600); assert!(cache.ttl_remaining("missing.com", QueryType::A).is_none()); - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); cache.insert("example.com", QueryType::A, &pkt); let (remaining, total) = cache.ttl_remaining("example.com", QueryType::A).unwrap(); assert_eq!(total, 300); @@ -1205,7 +1325,11 @@ mod tests { #[test] fn cache_dnssec_status_preserved() { let mut cache = DnsCache::new(100, 1, 3600); - let pkt = response(0x1234, "example.com", vec![a_record("example.com", "1.2.3.4", 300)]); + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 300)], + ); cache.insert_with_status("example.com", QueryType::A, &pkt, DnssecStatus::Secure); let (_, status) = cache @@ -1225,7 +1349,9 @@ mod tests { let mut cache = DnsCache::new(1000, 1, 3600); // Simulate a realistic cache: 50 domains, mix of record types - let domains: Vec = (0..50).map(|i| format!("domain{}.example.com", i)).collect(); + let domains: Vec = (0..50) + .map(|i| format!("domain{}.example.com", i)) + .collect(); let mut total_wire_bytes = 0usize; let mut total_wire_meta_bytes = 0usize; @@ -1259,8 +1385,7 @@ mod tests { let wire_aaaa = to_wire(&pkt_aaaa); let meta_aaaa = scan_ttl_offsets(&wire_aaaa).unwrap(); total_wire_bytes += wire_aaaa.len(); - total_wire_meta_bytes += - meta_aaaa.ttl_offsets.len() * std::mem::size_of::(); + total_wire_meta_bytes += meta_aaaa.ttl_offsets.len() * std::mem::size_of::(); } } @@ -1300,15 +1425,31 @@ mod tests { // Also measure the struct size difference per entry let parsed_struct = std::mem::size_of::(); - let wire_struct = std::mem::size_of::>() + std::mem::size_of::>() + std::mem::size_of::(); // wire + offsets + answer_count + let wire_struct = std::mem::size_of::>() + + std::mem::size_of::>() + + std::mem::size_of::(); // wire + offsets + answer_count println!(); - println!("=== Cache Memory Footprint Baseline ({} entries) ===", entry_count); + println!( + "=== Cache Memory Footprint Baseline ({} entries) ===", + entry_count + ); println!(); println!("Variable data (heap, per-entry payload):"); - println!(" Parsed (packet.heap_bytes): {} bytes ({:.1}/entry)", parsed_data_bytes, parsed_data_bytes as f64 / entry_count as f64); - println!(" Wire (bytes + TTL offsets): {} bytes ({:.1}/entry)", wire_total, wire_total as f64 / entry_count as f64); - println!(" Ratio: {:.1}x smaller with wire", parsed_data_bytes as f64 / wire_total as f64); + println!( + " Parsed (packet.heap_bytes): {} bytes ({:.1}/entry)", + parsed_data_bytes, + parsed_data_bytes as f64 / entry_count as f64 + ); + println!( + " Wire (bytes + TTL offsets): {} bytes ({:.1}/entry)", + wire_total, + wire_total as f64 / entry_count as f64 + ); + println!( + " Ratio: {:.1}x smaller with wire", + parsed_data_bytes as f64 / wire_total as f64 + ); println!(); println!("Struct overhead (stack, per entry):"); println!(" DnsPacket: {} bytes", parsed_struct); @@ -1319,7 +1460,10 @@ mod tests { let wire_total_per = wire_struct as f64 + wire_total as f64 / entry_count as f64; println!(" Parsed: {:.0} bytes", parsed_total_per); println!(" Wire: {:.0} bytes", wire_total_per); - println!(" Ratio: {:.1}x smaller with wire", parsed_total_per / wire_total_per); + println!( + " Ratio: {:.1}x smaller with wire", + parsed_total_per / wire_total_per + ); println!(); // Assertions -- 2.34.1 From 17a1a6ddba351d8b5ec529ef5ef242e57bcb56ec Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 06:42:59 +0300 Subject: [PATCH 017/139] refactor: remove forward_with_failover duplication, fix warm-branch hedge bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove forward_with_failover (parsed): warm_domain now uses _raw + insert_wire - forward_udp delegates to forward_udp_raw (single UDP socket implementation) - forward_query uses unified _raw path for all protocols - Fix send_query_hedged warm branch: bare select! dropped secondary on primary error instead of waiting for it — now drains both futures like the cold branch - Remove pointless raw_len = len rename --- src/forward.rs | 85 +++++++++--------------------------------------- src/main.rs | 52 +++++++++++++++++------------ src/recursive.rs | 27 +++++++++++++-- 3 files changed, 71 insertions(+), 93 deletions(-) diff --git a/src/forward.rs b/src/forward.rs index 6afb7e5..ebbe777 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -157,58 +157,6 @@ impl UpstreamPool { } } -pub async fn forward_with_failover( - query: &DnsPacket, - pool: &UpstreamPool, - srtt: &RwLock, - timeout_duration: Duration, -) -> Result { - // Build candidate list: primary (sorted by SRTT for UDP) then fallback - let mut candidates: Vec<(usize, u64)> = pool - .primary - .iter() - .enumerate() - .map(|(i, u)| { - let rtt = match u { - Upstream::Udp(addr) => srtt.read().unwrap().get(addr.ip()), - _ => 0, // DoH: keep config order (stable sort preserves it) - }; - (i, rtt) - }) - .collect(); - candidates.sort_by_key(|&(_, rtt)| rtt); - - let all_upstreams: Vec<&Upstream> = candidates - .iter() - .map(|&(i, _)| &pool.primary[i]) - .chain(pool.fallback.iter()) - .collect(); - - let mut last_err: Option> = None; - - for upstream in &all_upstreams { - let start = Instant::now(); - match forward_query(query, upstream, timeout_duration).await { - Ok(resp) => { - if let Upstream::Udp(addr) = upstream { - let rtt_ms = start.elapsed().as_millis() as u64; - srtt.write().unwrap().record_rtt(addr.ip(), rtt_ms, false); - } - return Ok(resp); - } - Err(e) => { - if let Upstream::Udp(addr) = upstream { - srtt.write().unwrap().record_failure(addr.ip()); - } - log::debug!("upstream {} failed: {}", upstream, e); - last_err = Some(e); - } - } - } - - Err(last_err.unwrap_or_else(|| "no upstream configured".into())) -} - pub async fn forward_query( query: &DnsPacket, upstream: &Upstream, @@ -226,24 +174,14 @@ pub(crate) async fn forward_udp( upstream: SocketAddr, timeout_duration: Duration, ) -> Result { - let socket = UdpSocket::bind("0.0.0.0:0").await?; - let mut send_buffer = BytePacketBuffer::new(); query.write(&mut send_buffer)?; - socket.send_to(send_buffer.filled(), upstream).await?; - - let mut recv_buffer = BytePacketBuffer::new(); - let (size, _) = timeout(timeout_duration, socket.recv_from(&mut recv_buffer.buf)).await??; - - if size == recv_buffer.buf.len() { - log::debug!( - "upstream response truncated ({} bytes, buffer {})", - size, - recv_buffer.buf.len() - ); + let data = forward_udp_raw(send_buffer.filled(), upstream, timeout_duration).await?; + if data.len() >= 4096 { + log::debug!("upstream response may be truncated ({} bytes)", data.len()); } - + let mut recv_buffer = BytePacketBuffer::from_bytes(&data); DnsPacket::from_buffer(&mut recv_buffer) } @@ -721,10 +659,19 @@ mod tests { ); let srtt = RwLock::new(SrttCache::new(true)); - let result = forward_with_failover(&query, &pool, &srtt, Duration::from_millis(500)) - .await - .expect("should fail over to second upstream"); + let wire = to_wire(&query); + let resp_wire = forward_with_failover_raw( + &wire, + &pool, + &srtt, + Duration::from_millis(500), + Duration::ZERO, + ) + .await + .expect("should fail over to second upstream"); + let mut buf = BytePacketBuffer::from_bytes(&resp_wire); + let result = DnsPacket::from_buffer(&mut buf).unwrap(); assert_eq!(result.header.id, 0xABCD); assert_eq!(result.answers.len(), 1); } diff --git a/src/main.rs b/src/main.rs index 0211a59..68e4794 100644 --- a/src/main.rs +++ b/src/main.rs @@ -607,11 +607,9 @@ async fn main() -> numa::Result<()> { } Err(e) => return Err(e.into()), }; - let raw_len = len; - let ctx = Arc::clone(&ctx); tokio::spawn(async move { - if let Err(e) = handle_query(buffer, raw_len, src_addr, &ctx).await { + if let Err(e) = handle_query(buffer, len, src_addr, &ctx).await { error!("{} | HANDLER ERROR | {}", src_addr, e); } }); @@ -762,27 +760,39 @@ async fn warm_domain(ctx: &ServerCtx, domain: &str) { use numa::question::QueryType; for qtype in [QueryType::A, QueryType::AAAA] { - let query = numa::packet::DnsPacket::query(0, domain, qtype); - let result = if ctx.upstream_mode == numa::config::UpstreamMode::Recursive { - numa::recursive::resolve_recursive( - domain, - qtype, - &ctx.cache, - &query, - &ctx.root_hints, - &ctx.srtt, + if ctx.upstream_mode == numa::config::UpstreamMode::Recursive { + let query = numa::packet::DnsPacket::query(0, domain, qtype); + match numa::recursive::resolve_recursive( + domain, qtype, &ctx.cache, &query, &ctx.root_hints, &ctx.srtt, ) .await - } else { - let pool = ctx.upstream_pool.lock().unwrap().clone(); - numa::forward::forward_with_failover(&query, &pool, &ctx.srtt, ctx.timeout).await - }; - match result { - Ok(resp) => { - ctx.cache.write().unwrap().insert(domain, qtype, &resp); - log::debug!("cache warm: {} {:?}", domain, qtype); + { + Ok(resp) => { + ctx.cache.write().unwrap().insert(domain, qtype, &resp); + log::debug!("cache warm: {} {:?}", domain, qtype); + } + Err(e) => log::warn!("cache warm: {} {:?} failed: {}", domain, qtype, e), + } + } else { + let query = numa::packet::DnsPacket::query(0, domain, qtype); + let mut buf = numa::buffer::BytePacketBuffer::new(); + if query.write(&mut buf).is_err() { + continue; + } + let pool = ctx.upstream_pool.lock().unwrap().clone(); + match numa::forward::forward_with_failover_raw( + buf.filled(), &pool, &ctx.srtt, ctx.timeout, ctx.hedge_delay, + ) + .await + { + Ok(wire) => { + ctx.cache.write().unwrap().insert_wire( + domain, qtype, &wire, numa::cache::DnssecStatus::Indeterminate, + ); + log::debug!("cache warm: {} {:?}", domain, qtype); + } + Err(e) => log::warn!("cache warm: {} {:?} failed: {}", domain, qtype, e), } - Err(e) => log::warn!("cache warm: {} {:?} failed: {}", domain, qtype, e), } } } diff --git a/src/recursive.rs b/src/recursive.rs index 190a57a..70f35c0 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -690,9 +690,30 @@ async fn send_query_hedged( let fut_b = send_query(qname, qtype, secondary, srtt); tokio::pin!(fut_b); - tokio::select! { - r = fut_a => r, - r = fut_b => r, + // First Ok wins; if one errors, wait for the other. + let mut a_err: Option = None; + let mut b_err: Option = None; + loop { + tokio::select! { + r = &mut fut_a, if a_err.is_none() => { + match r { + Ok(resp) => return Ok(resp), + Err(e) => { + if b_err.is_some() { return Err(e); } + a_err = Some(e); + } + } + } + r = &mut fut_b, if b_err.is_none() => { + match r { + Ok(resp) => return Ok(resp), + Err(e) => { + if let Some(ae) = a_err.take() { return Err(ae); } + b_err = Some(e); + } + } + } + } } } } -- 2.34.1 From f705f8c49fc89d2919ed5f39d95239318ca7814d Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 06:45:10 +0300 Subject: [PATCH 018/139] fix: bump TCP_TIMEOUT to 800ms to fix flaky CI test --- src/recursive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recursive.rs b/src/recursive.rs index 70f35c0..0910421 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -16,7 +16,7 @@ use crate::srtt::SrttCache; const MAX_REFERRAL_DEPTH: u8 = 10; const MAX_CNAME_DEPTH: u8 = 8; const NS_QUERY_TIMEOUT: Duration = Duration::from_millis(400); -const TCP_TIMEOUT: Duration = Duration::from_millis(400); +const TCP_TIMEOUT: Duration = Duration::from_millis(800); const UDP_FAIL_THRESHOLD: u8 = 3; static QUERY_ID: AtomicU16 = AtomicU16::new(1); -- 2.34.1 From 700cca9cb616aeecf5d28c52a099f2f134b318ac Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 11:09:20 +0300 Subject: [PATCH 019/139] style: rustfmt warm_domain --- src/main.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/main.rs b/src/main.rs index 68e4794..ebc16cc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -763,7 +763,12 @@ async fn warm_domain(ctx: &ServerCtx, domain: &str) { if ctx.upstream_mode == numa::config::UpstreamMode::Recursive { let query = numa::packet::DnsPacket::query(0, domain, qtype); match numa::recursive::resolve_recursive( - domain, qtype, &ctx.cache, &query, &ctx.root_hints, &ctx.srtt, + domain, + qtype, + &ctx.cache, + &query, + &ctx.root_hints, + &ctx.srtt, ) .await { @@ -781,13 +786,20 @@ async fn warm_domain(ctx: &ServerCtx, domain: &str) { } let pool = ctx.upstream_pool.lock().unwrap().clone(); match numa::forward::forward_with_failover_raw( - buf.filled(), &pool, &ctx.srtt, ctx.timeout, ctx.hedge_delay, + buf.filled(), + &pool, + &ctx.srtt, + ctx.timeout, + ctx.hedge_delay, ) .await { Ok(wire) => { ctx.cache.write().unwrap().insert_wire( - domain, qtype, &wire, numa::cache::DnssecStatus::Indeterminate, + domain, + qtype, + &wire, + numa::cache::DnssecStatus::Indeterminate, ); log::debug!("cache warm: {} {:?}", domain, qtype); } -- 2.34.1 From 67b472fea787227faa99c19a6bab6f24fd981d29 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 11:47:48 +0300 Subject: [PATCH 020/139] fix: serialize tests that share global UDP_DISABLED state The tcp_only_iterative_resolution, tcp_fallback_resolves_when_udp_blocked, tcp_fallback_handles_nxdomain, and udp_auto_disable_resets tests all mutate global UDP_DISABLED / UDP_FAILURES atomics. Under cargo test parallelism, udp_auto_disable_resets would reset the flag mid-flight causing other tests to attempt UDP against TCP-only mock servers and time out. Fix: static Mutex serializes tests that depend on global UDP state. Also: tcp_only_iterative_resolution now calls forward_tcp directly, removing its dependence on the flag entirely. --- src/recursive.rs | 54 ++++++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/src/recursive.rs b/src/recursive.rs index 0910421..53397d2 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -813,6 +813,10 @@ mod tests { use super::*; use std::net::{Ipv4Addr, Ipv6Addr}; + /// Tests that mutate the global UDP_DISABLED / UDP_FAILURES flags must hold + /// this lock to avoid racing with each other under `cargo test` parallelism. + static UDP_STATE_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + #[test] fn extract_ns_from_authority() { let mut pkt = DnsPacket::new(); @@ -1054,6 +1058,7 @@ mod tests { /// Verifies: when UDP is disabled, TCP-first resolves. #[tokio::test] async fn tcp_fallback_resolves_when_udp_blocked() { + let _guard = UDP_STATE_LOCK.lock().unwrap(); UDP_DISABLED.store(true, Ordering::Relaxed); UDP_FAILURES.store(0, Ordering::Release); @@ -1085,49 +1090,32 @@ mod tests { } } - /// Full iterative resolution through TCP-only mock: root referral → authoritative answer. - /// The mock plays both roles (returns referral for NS queries, answer for A queries). + /// TCP round-trip through mock: query → authoritative answer via forward_tcp. + /// Uses forward_tcp directly to avoid dependence on the global UDP_DISABLED flag + /// which is shared across concurrent tests. #[tokio::test] async fn tcp_only_iterative_resolution() { - UDP_DISABLED.store(true, Ordering::Release); // Skip UDP entirely for speed - let server_addr = spawn_tcp_dns_server(|query| { let q = match query.questions.first() { Some(q) => q, None => return DnsPacket::response_from(query, ResultCode::SERVFAIL), }; - if q.qtype == QueryType::NS || q.name == "com" { - // Return referral — NS points back to ourselves (same IP, port 53 in glue - // won't work, but cache will have our address from root_hints) - let mut resp = DnsPacket::new(); - resp.header.id = query.header.id; - resp.header.response = true; - resp.header.rescode = ResultCode::NOERROR; - resp.questions = query.questions.clone(); - resp.authorities.push(DnsRecord::NS { - domain: "com".into(), - host: "ns1.com".into(), - ttl: 3600, - }); - resp - } else { - // Return authoritative answer - let mut resp = DnsPacket::response_from(query, ResultCode::NOERROR); - resp.header.authoritative_answer = true; - resp.answers.push(DnsRecord::A { - domain: q.name.clone(), - addr: Ipv4Addr::new(10, 0, 0, 42), - ttl: 300, - }); - resp - } + let mut resp = DnsPacket::response_from(query, ResultCode::NOERROR); + resp.header.authoritative_answer = true; + resp.answers.push(DnsRecord::A { + domain: q.name.clone(), + addr: Ipv4Addr::new(10, 0, 0, 42), + ttl: 300, + }); + resp }) .await; - let srtt = RwLock::new(SrttCache::new(true)); - let result = send_query("hello.example.com", QueryType::A, server_addr, &srtt).await; - let resp = result.expect("TCP-only send_query should work"); + let query = DnsPacket::query(0x1234, "hello.example.com", QueryType::A); + let resp = crate::forward::forward_tcp(&query, server_addr, TCP_TIMEOUT) + .await + .expect("TCP query should work"); assert_eq!(resp.header.rescode, ResultCode::NOERROR); match &resp.answers[0] { DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::new(10, 0, 0, 42)), @@ -1137,6 +1125,7 @@ mod tests { #[tokio::test] async fn tcp_fallback_handles_nxdomain() { + let _guard = UDP_STATE_LOCK.lock().unwrap(); UDP_DISABLED.store(true, Ordering::Relaxed); UDP_FAILURES.store(0, Ordering::Release); @@ -1169,6 +1158,7 @@ mod tests { #[tokio::test] async fn udp_auto_disable_resets() { + let _guard = UDP_STATE_LOCK.lock().unwrap(); UDP_DISABLED.store(true, Ordering::Release); UDP_FAILURES.store(5, Ordering::Relaxed); -- 2.34.1 From 85cff052a4e4efd513b8ef4eb8f4a3b4dcc923a3 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 12:34:20 +0300 Subject: [PATCH 021/139] fix: restore TCP_TIMEOUT to 400ms (test race was the real issue) --- src/recursive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recursive.rs b/src/recursive.rs index 53397d2..a4dff08 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -16,7 +16,7 @@ use crate::srtt::SrttCache; const MAX_REFERRAL_DEPTH: u8 = 10; const MAX_CNAME_DEPTH: u8 = 8; const NS_QUERY_TIMEOUT: Duration = Duration::from_millis(400); -const TCP_TIMEOUT: Duration = Duration::from_millis(800); +const TCP_TIMEOUT: Duration = Duration::from_millis(400); const UDP_FAIL_THRESHOLD: u8 = 3; static QUERY_ID: AtomicU16 = AtomicU16::new(1); -- 2.34.1 From 628ed00074dd423b51c71e46211fecc1f17f1bfb Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 13:08:37 +0300 Subject: [PATCH 022/139] refactor: extract cache_and_parse, remove dead truncation log, restore TCP_TIMEOUT to 400ms --- src/ctx.rs | 53 ++++++++++++++++++++++++-------------------------- src/forward.rs | 4 ---- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 46316f2..e1d2d95 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -229,29 +229,17 @@ pub async fn resolve_query( ) .await { - Ok(resp_wire) => { - ctx.cache.write().unwrap().insert_wire( - &qname, - qtype, - &resp_wire, - DnssecStatus::Indeterminate, - ); - let mut buf = BytePacketBuffer::from_bytes(&resp_wire); - match DnsPacket::from_buffer(&mut buf) { - Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), - Err(e) => { - error!( - "{} | {:?} {} | PARSE ERROR | {}", - src_addr, qtype, qname, e - ); - ( - DnsPacket::response_from(&query, ResultCode::SERVFAIL), - QueryPath::UpstreamError, - DnssecStatus::Indeterminate, - ) - } + Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) { + Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), + Err(e) => { + error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e); + ( + DnsPacket::response_from(&query, ResultCode::SERVFAIL), + QueryPath::UpstreamError, + DnssecStatus::Indeterminate, + ) } - } + }, Err(e) => { error!( "{} | {:?} {} | UPSTREAM ERROR | {}", @@ -373,6 +361,20 @@ pub async fn resolve_query( Ok(resp_buffer) } +fn cache_and_parse( + ctx: &ServerCtx, + qname: &str, + qtype: QueryType, + resp_wire: &[u8], +) -> crate::Result { + ctx.cache + .write() + .unwrap() + .insert_wire(qname, qtype, resp_wire, DnssecStatus::Indeterminate); + let mut buf = BytePacketBuffer::from_bytes(resp_wire); + DnsPacket::from_buffer(&mut buf) +} + async fn forward_and_cache( wire: &[u8], upstream: &Upstream, @@ -381,12 +383,7 @@ async fn forward_and_cache( qtype: QueryType, ) -> crate::Result { let resp_wire = forward_query_raw(wire, upstream, ctx.timeout).await?; - ctx.cache - .write() - .unwrap() - .insert_wire(qname, qtype, &resp_wire, DnssecStatus::Indeterminate); - let mut buf = BytePacketBuffer::from_bytes(&resp_wire); - DnsPacket::from_buffer(&mut buf) + cache_and_parse(ctx, qname, qtype, &resp_wire) } pub async fn handle_query( diff --git a/src/forward.rs b/src/forward.rs index ebbe777..839ac81 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -176,11 +176,7 @@ pub(crate) async fn forward_udp( ) -> Result { let mut send_buffer = BytePacketBuffer::new(); query.write(&mut send_buffer)?; - let data = forward_udp_raw(send_buffer.filled(), upstream, timeout_duration).await?; - if data.len() >= 4096 { - log::debug!("upstream response may be truncated ({} bytes)", data.len()); - } let mut recv_buffer = BytePacketBuffer::from_bytes(&data); DnsPacket::from_buffer(&mut recv_buffer) } -- 2.34.1 From 15058aea83c4f171e5f7a8160351b87b6b06d9e3 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 18:35:40 +0300 Subject: [PATCH 023/139] bench: add --vs-nextdns, --vs-unbound-cold modes with mode validation - --vs-nextdns: Numa local cache vs NextDNS cloud (45.90.28.0) - --vs-unbound-cold: unique random subdomains, no record cache hits - check_numa_mode validates forward/recursive mode before running - numa-bench-recursive.toml config for cold benchmarks --- benches/numa-bench-recursive.toml | 30 ++++++++++++++ benches/recursive_compare.rs | 66 ++++++++++++++++++++++++++++--- 2 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 benches/numa-bench-recursive.toml diff --git a/benches/numa-bench-recursive.toml b/benches/numa-bench-recursive.toml new file mode 100644 index 0000000..055d75a --- /dev/null +++ b/benches/numa-bench-recursive.toml @@ -0,0 +1,30 @@ +[server] +bind_addr = "127.0.0.1:5454" +api_port = 5381 +api_bind_addr = "127.0.0.1" +data_dir = "/tmp/numa-bench" + +[upstream] +mode = "recursive" +timeout_ms = 10000 + +[cache] +min_ttl = 60 +max_ttl = 3600 + +[blocking] +enabled = false + +[proxy] +port = 8080 +tls_port = 8443 + +[dot] +enabled = true +port = 8530 + +[mobile] +enabled = false + +[lan] +enabled = false diff --git a/benches/recursive_compare.rs b/benches/recursive_compare.rs index 12f3689..dcff2c5 100644 --- a/benches/recursive_compare.rs +++ b/benches/recursive_compare.rs @@ -7,6 +7,8 @@ //! --direct Library-to-library: Numa forward_query_raw vs Hickory resolver.lookup //! --hedge-5x Hedging: single vs hedge-same vs hedge-dual vs Hickory (5 iterations) //! --vs-unbound Server-to-server: Numa vs Unbound (plain UDP, caching) +//! --vs-unbound-cold Cold: Numa vs Unbound (unique subdomains, no cache hits) +//! --vs-nextdns Server-to-cloud: Numa (local cache) vs NextDNS (remote, 45.90.28.0) //! --vs-dot DoT server: Numa vs Unbound //! --vs-doh-servers DoH server: Numa vs Unbound (DoT upstream) //! @@ -145,10 +147,20 @@ fn main() { return run_hedge_multi(&rt, 5); } if arg("--vs-unbound") { - return run_server_comparison(&rt, "Unbound", "127.0.0.1:5456", 5); + check_numa_mode(&rt, "forward"); + return run_server_comparison(&rt, "Unbound", "127.0.0.1:5456", 5, false); + } + if arg("--vs-unbound-cold") { + check_numa_mode(&rt, "recursive"); + return run_server_comparison(&rt, "Unbound", "127.0.0.1:5456", 5, true); } if arg("--vs-dnscrypt") { - return run_server_comparison(&rt, "dnscrypt-proxy", "127.0.0.1:5455", 5); + check_numa_mode(&rt, "forward"); + return run_server_comparison(&rt, "dnscrypt-proxy", "127.0.0.1:5455", 5, false); + } + if arg("--vs-nextdns") { + check_numa_mode(&rt, "forward"); + return run_server_comparison(&rt, "NextDNS", "45.90.28.0:53", 5, false); } if arg("--vs-dot") { return run_dot_comparison(&rt, 5); @@ -380,12 +392,18 @@ fn run_direct(rt: &tokio::runtime::Runtime) { } /// Server-to-server: Numa vs another server, both on plain UDP. +/// When `cold` is true, each query uses a unique random subdomain so neither +/// server can answer from its record cache (NS delegation caching still applies). fn run_server_comparison( rt: &tokio::runtime::Runtime, other_name: &str, other_addr: &str, iterations: usize, + cold: bool, ) { + use std::sync::atomic::{AtomicU64, Ordering}; + static COUNTER: AtomicU64 = AtomicU64::new(0); + let numa_addr: SocketAddr = NUMA_BENCH.parse().unwrap(); let other: SocketAddr = other_addr.parse().unwrap(); @@ -402,19 +420,35 @@ fn run_server_comparison( let _ = rt.block_on(query_udp(other, "example.com")); } + let tag = if cold { + "cold, unique subdomains" + } else { + "caching" + }; + compare_two( rt, - &format!("Server-to-Server: Numa vs {other_name} (UDP, caching)"), + &format!("Server-to-Server: Numa vs {other_name} (UDP, {tag})"), "Numa", other_name, &|domain| { + let d = if cold { + format!("c{}.{}", COUNTER.fetch_add(1, Ordering::Relaxed), domain) + } else { + domain.to_string() + }; let t = Instant::now(); - let _ = rt.block_on(query_udp(numa_addr, domain)); + let _ = rt.block_on(query_udp(numa_addr, &d)); t.elapsed().as_secs_f64() * 1000.0 }, &|domain| { + let d = if cold { + format!("c{}.{}", COUNTER.fetch_add(1, Ordering::Relaxed), domain) + } else { + domain.to_string() + }; let t = Instant::now(); - let _ = rt.block_on(query_udp(other, domain)); + let _ = rt.block_on(query_udp(other, &d)); t.elapsed().as_secs_f64() * 1000.0 }, iterations, @@ -991,6 +1025,28 @@ fn build_query(buf: &mut [u8], domain: &str) -> usize { pos } +fn check_numa_mode(rt: &tokio::runtime::Runtime, expected: &str) { + let url = format!("http://127.0.0.1:{NUMA_API}/stats"); + let resp = match rt.block_on(async { reqwest::get(&url).await?.text().await }) { + Ok(body) => body, + Err(_) => { + eprintln!("Bench Numa not responding on {NUMA_BENCH}"); + eprintln!("Start with: cargo run -- benches/numa-bench.toml"); + std::process::exit(1); + } + }; + let config = if expected == "recursive" { + "benches/numa-bench-recursive.toml" + } else { + "benches/numa-bench.toml" + }; + if !resp.contains(&format!("\"mode\":\"{expected}\"")) { + eprintln!("This benchmark requires Numa in {expected} mode."); + eprintln!("Restart with: cargo run -- {config}"); + std::process::exit(1); + } +} + fn flush_cache() { let _ = std::process::Command::new("curl") .args([ -- 2.34.1 From 05d5a5145f09765f84d714a4964b71a7a28ab34b Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 18:46:03 +0300 Subject: [PATCH 024/139] refactor: remove unused extract_question and read_wire_qname from wire.rs --- src/wire.rs | 130 ++-------------------------------------------------- 1 file changed, 3 insertions(+), 127 deletions(-) diff --git a/src/wire.rs b/src/wire.rs index a93fe27..8d299ce 100644 --- a/src/wire.rs +++ b/src/wire.rs @@ -3,7 +3,6 @@ //! These operate directly on raw DNS wire bytes without full packet parsing, //! enabling zero-copy forwarding and wire-level caching. -use crate::question::QueryType; use crate::Result; /// Metadata extracted from scanning a DNS response's wire bytes. @@ -18,32 +17,6 @@ pub struct WireMeta { pub answer_count: usize, } -/// Extract the first question's (domain, query type) from raw DNS wire bytes. -/// -/// Reads only the 12-byte header + first question section. Returns the lowercased -/// domain name and query type without allocating a full `DnsPacket`. -pub fn extract_question(wire: &[u8]) -> Result<(String, QueryType)> { - if wire.len() < 12 { - return Err("wire too short for DNS header".into()); - } - let qdcount = u16::from_be_bytes([wire[4], wire[5]]); - if qdcount == 0 { - return Err("no questions in wire".into()); - } - - let mut pos = 12; - let mut domain = String::with_capacity(64); - read_wire_qname(wire, &mut pos, &mut domain)?; - - if pos + 4 > wire.len() { - return Err("wire truncated in question section".into()); - } - let qtype = u16::from_be_bytes([wire[pos], wire[pos + 1]]); - // skip QTYPE(2) + QCLASS(2) - - Ok((domain, QueryType::from_num(qtype))) -} - /// Scan a DNS response's wire bytes and return metadata about TTL field locations. /// /// Walks the header, skips the question section, then for each resource record in @@ -155,62 +128,6 @@ pub fn patch_ttls(wire: &mut [u8], offsets: &[usize], new_ttl: u32) { } } -/// Read a DNS name from wire bytes at `pos`, handling compression pointers. -/// Advances `pos` past the name as it appears at the current position -/// (compression pointer targets do NOT advance `pos`). -fn read_wire_qname(wire: &[u8], pos: &mut usize, out: &mut String) -> Result<()> { - let mut jumped = false; - let mut read_pos = *pos; - let mut jumps = 0; - let max_jumps = 20; - - loop { - if read_pos >= wire.len() { - return Err("wire truncated reading name".into()); - } - let len = wire[read_pos] as usize; - - // Compression pointer: top 2 bits set - if len & 0xC0 == 0xC0 { - if read_pos + 1 >= wire.len() { - return Err("wire truncated in compression pointer".into()); - } - if !jumped { - *pos = read_pos + 2; // advance past the pointer - } - let offset = ((len & 0x3F) << 8) | wire[read_pos + 1] as usize; - read_pos = offset; - jumped = true; - jumps += 1; - if jumps > max_jumps { - return Err("too many compression jumps".into()); - } - continue; - } - - if len == 0 { - if !jumped { - *pos = read_pos + 1; - } - break; - } - - if read_pos + 1 + len > wire.len() { - return Err("wire truncated in name label".into()); - } - - if !out.is_empty() { - out.push('.'); - } - for &b in &wire[read_pos + 1..read_pos + 1 + len] { - out.push(b.to_ascii_lowercase() as char); - } - read_pos += 1 + len; - } - - Ok(()) -} - /// Skip a DNS name in wire bytes, advancing `pos` past it. fn skip_wire_name(wire: &[u8], pos: &mut usize) -> Result<()> { loop { @@ -238,7 +155,7 @@ mod tests { use crate::cache::{DnsCache, DnssecStatus}; use crate::header::ResultCode; use crate::packet::{DnsPacket, EdnsOpt}; - use crate::question::DnsQuestion; + use crate::question::{DnsQuestion, QueryType}; use crate::record::DnsRecord; // ── Helpers ────────────────────────────────────────────────────── @@ -760,43 +677,7 @@ mod tests { assert_eq!(&wire[0..2], &[0x00, 0x00]); } - // ── D. extract_question ───────────────────────────────────────── - - #[test] - fn extract_question_basic() { - let pkt = DnsPacket::query(0x1234, "Example.COM", QueryType::A); - let wire = to_wire(&pkt); - let (domain, qtype) = extract_question(&wire).unwrap(); - - assert_eq!(domain, "example.com"); // lowercased - assert_eq!(qtype, QueryType::A); - } - - #[test] - fn extract_question_aaaa() { - let pkt = DnsPacket::query(0x1234, "rust-lang.org", QueryType::AAAA); - let wire = to_wire(&pkt); - let (domain, qtype) = extract_question(&wire).unwrap(); - - assert_eq!(domain, "rust-lang.org"); - assert_eq!(qtype, QueryType::AAAA); - } - - #[test] - fn extract_question_too_short() { - assert!(extract_question(&[0u8; 5]).is_err()); - } - - #[test] - fn extract_question_no_questions() { - let mut wire = to_wire(&DnsPacket::query(0x1234, "example.com", QueryType::A)); - // Zero out QDCOUNT (bytes 4-5) - wire[4] = 0; - wire[5] = 0; - assert!(extract_question(&wire).is_err()); - } - - // ── E. min_ttl_from_wire ──────────────────────────────────────── + // ── D. min_ttl_from_wire ──────────────────────────────────────── #[test] fn min_ttl_answers_only() { @@ -1060,12 +941,7 @@ mod tests { assert!(scan_ttl_offsets(&[]).is_err()); } - #[test] - fn extract_question_rejects_empty_wire() { - assert!(extract_question(&[]).is_err()); - } - - // ── H. Cache behavior tests ───────────────────────────────────── + // ── G. Cache behavior tests ───────────────────────────────────── // // These test existing DnsCache behavior that must be preserved after // the wire-level migration. They use the current parsed-packet API -- 2.34.1 From 043a7e1ba5da32c291709d785f86d1fa668e5994 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 19:23:28 +0300 Subject: [PATCH 025/139] feat: raise cache default to 100K entries, evict stalest instead of dropping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 10K cap was too conservative — the blocklist alone holds 400K domains. At ~100 bytes per wire entry, 100K entries is ~10MB. When the cache is full and evict_expired doesn't free enough slots, evict_stalest removes the entry with the least remaining TTL instead of silently discarding the new insert. --- src/cache.rs | 30 +++++++++++++++++++++++++++++- src/config.rs | 2 +- src/wire.rs | 17 ++++++++++++----- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 82795bc..42cea5f 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -100,7 +100,7 @@ impl DnsCache { if self.entry_count >= self.max_entries { self.evict_expired(); if self.entry_count >= self.max_entries { - return; + self.evict_stalest(); } } @@ -260,6 +260,34 @@ impl DnsCache { }); self.entry_count -= count; } + + /// Evict the single entry closest to (or furthest past) expiry. + fn evict_stalest(&mut self) { + let mut worst: Option<(String, QueryType, Duration)> = None; + for (domain, type_map) in &self.entries { + for (qtype, entry) in type_map { + let age = entry.inserted_at.elapsed(); + let remaining = entry.ttl.saturating_sub(age); + match &worst { + None => worst = Some((domain.clone(), *qtype, remaining)), + Some((_, _, w)) if remaining < *w => { + worst = Some((domain.clone(), *qtype, remaining)); + } + _ => {} + } + } + } + if let Some((domain, qtype, _)) = worst { + if let Some(type_map) = self.entries.get_mut(&domain) { + if type_map.remove(&qtype).is_some() { + self.entry_count -= 1; + } + if type_map.is_empty() { + self.entries.remove(&domain); + } + } + } + } } pub struct CacheInfo { diff --git a/src/config.rs b/src/config.rs index 5f9db73..237f3bd 100644 --- a/src/config.rs +++ b/src/config.rs @@ -302,7 +302,7 @@ impl Default for CacheConfig { } fn default_max_entries() -> usize { - 10000 + 100_000 } fn default_min_ttl() -> u32 { 60 diff --git a/src/wire.rs b/src/wire.rs index 8d299ce..6e2c213 100644 --- a/src/wire.rs +++ b/src/wire.rs @@ -1350,18 +1350,25 @@ mod tests { } #[test] - fn cache_max_entries_cap() { + fn cache_max_entries_evicts_stalest() { let mut cache = DnsCache::new(2, 1, 3600); - for i in 0..3 { + // Insert with decreasing TTL so test0.com is stalest + for (i, ttl) in [(0, 60), (1, 3600)] { let domain = format!("test{}.com", i); let pkt = response( i as u16, &domain, - vec![a_record(&domain, &format!("1.2.3.{}", i), 3600)], + vec![a_record(&domain, &format!("1.2.3.{}", i), ttl)], ); cache.insert(&domain, QueryType::A, &pkt); } - // Should not exceed max (third insert is silently dropped or evicts) - assert!(cache.len() <= 2); + assert_eq!(cache.len(), 2); + + // Third insert should evict test0.com (lowest remaining TTL) + let pkt = response(2, "test2.com", vec![a_record("test2.com", "1.2.3.2", 3600)]); + cache.insert("test2.com", QueryType::A, &pkt); + assert_eq!(cache.len(), 2); + assert!(cache.lookup("test0.com", QueryType::A).is_none()); // evicted + assert!(cache.lookup("test2.com", QueryType::A).is_some()); // inserted } } -- 2.34.1 From 571ce2f0133c974517a51f87b4aa754065cb1d14 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 19:42:56 +0300 Subject: [PATCH 026/139] feat: background refresh on stale cache hit (RFC 8767 revalidation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a cached entry is expired but within the 1-hour stale window, serve it immediately with TTL=1 AND spawn a background re-resolve. The next query gets a fresh entry instead of another stale serve. Without this, stale entries were served repeatedly for up to an hour with no refresh — effectively ignoring TTL. --- src/cache.rs | 9 +++++---- src/ctx.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++++++---- src/doh.rs | 6 +++++- src/dot.rs | 7 +++++-- 4 files changed, 64 insertions(+), 11 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 42cea5f..5f62cc8 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -132,18 +132,19 @@ impl DnsCache { /// Read-only lookup — expired entries are left in place (cleaned up on insert). pub fn lookup(&self, domain: &str, qtype: QueryType) -> Option { - self.lookup_with_status(domain, qtype).map(|(pkt, _)| pkt) + self.lookup_with_status(domain, qtype) + .map(|(pkt, _, _)| pkt) } pub fn lookup_with_status( &self, domain: &str, qtype: QueryType, - ) -> Option<(DnsPacket, DnssecStatus)> { - let (wire, status, _stale) = self.lookup_wire(domain, qtype, 0)?; + ) -> Option<(DnsPacket, DnssecStatus, bool)> { + let (wire, status, stale) = self.lookup_wire(domain, qtype, 0)?; let mut buf = BytePacketBuffer::from_bytes(&wire); let pkt = DnsPacket::from_buffer(&mut buf).ok()?; - Some((pkt, status)) + Some((pkt, status, stale)) } pub fn insert(&mut self, domain: &str, qtype: QueryType, packet: &DnsPacket) { diff --git a/src/ctx.rs b/src/ctx.rs index e1d2d95..c1f28f2 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::net::SocketAddr; use std::path::PathBuf; -use std::sync::{Mutex, RwLock}; +use std::sync::{Arc, Mutex, RwLock}; use std::time::{Duration, Instant, SystemTime}; use arc_swap::ArcSwap; @@ -84,7 +84,7 @@ pub async fn resolve_query( query: DnsPacket, raw_wire: &[u8], src_addr: SocketAddr, - ctx: &ServerCtx, + ctx: &Arc, ) -> crate::Result { let start = Instant::now(); @@ -166,7 +166,12 @@ pub async fn resolve_query( (resp, QueryPath::Blocked, DnssecStatus::Indeterminate) } else { let cached = ctx.cache.read().unwrap().lookup_with_status(&qname, qtype); - if let Some((cached, cached_dnssec)) = cached { + if let Some((cached, cached_dnssec, stale)) = cached { + if stale { + let ctx = Arc::clone(ctx); + let qname = qname.clone(); + tokio::spawn(async move { warm_stale(&ctx, &qname, qtype).await }); + } let mut resp = cached; resp.header.id = query.header.id; if cached_dnssec == DnssecStatus::Secure { @@ -375,6 +380,46 @@ fn cache_and_parse( DnsPacket::from_buffer(&mut buf) } +/// Background refresh for a stale cache entry (RFC 8767 revalidation). +async fn warm_stale(ctx: &ServerCtx, qname: &str, qtype: QueryType) { + let query = DnsPacket::query(0, qname, qtype); + if ctx.upstream_mode == UpstreamMode::Recursive { + if let Ok(resp) = crate::recursive::resolve_recursive( + qname, + qtype, + &ctx.cache, + &query, + &ctx.root_hints, + &ctx.srtt, + ) + .await + { + ctx.cache.write().unwrap().insert(qname, qtype, &resp); + } + } else { + let mut buf = BytePacketBuffer::new(); + if query.write(&mut buf).is_ok() { + let pool = ctx.upstream_pool.lock().unwrap().clone(); + if let Ok(wire) = forward_with_failover_raw( + buf.filled(), + &pool, + &ctx.srtt, + ctx.timeout, + ctx.hedge_delay, + ) + .await + { + ctx.cache.write().unwrap().insert_wire( + qname, + qtype, + &wire, + DnssecStatus::Indeterminate, + ); + } + } + } +} + async fn forward_and_cache( wire: &[u8], upstream: &Upstream, @@ -390,7 +435,7 @@ pub async fn handle_query( mut buffer: BytePacketBuffer, raw_len: usize, src_addr: SocketAddr, - ctx: &ServerCtx, + ctx: &Arc, ) -> crate::Result<()> { let raw_wire = buffer.buf[..raw_len].to_vec(); let query = match DnsPacket::from_buffer(&mut buffer) { diff --git a/src/doh.rs b/src/doh.rs index e31b6fe..bc4ba95 100644 --- a/src/doh.rs +++ b/src/doh.rs @@ -60,7 +60,11 @@ fn is_doh_host(host: Option<&str>, tld: &str) -> bool { } } -async fn resolve_doh(dns_bytes: &[u8], src: SocketAddr, ctx: &ServerCtx) -> Response { +async fn resolve_doh( + dns_bytes: &[u8], + src: SocketAddr, + ctx: &std::sync::Arc, +) -> Response { let mut buffer = BytePacketBuffer::from_bytes(dns_bytes); let query = match DnsPacket::from_buffer(&mut buffer) { Ok(q) => q, diff --git a/src/dot.rs b/src/dot.rs index 4513f60..be22375 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -153,8 +153,11 @@ async fn accept_loop(listener: TcpListener, acceptor: TlsAcceptor, ctx: Arc(mut stream: S, remote_addr: SocketAddr, ctx: &ServerCtx) -where +async fn handle_dot_connection( + mut stream: S, + remote_addr: SocketAddr, + ctx: &std::sync::Arc, +) where S: AsyncReadExt + AsyncWriteExt + Unpin, { loop { -- 2.34.1 From 8ef95383a21c4e2267a9fddc9ccf30861241d6a5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 19:46:14 +0300 Subject: [PATCH 027/139] feat: prefetch at <10% TTL remaining, add stale behavior tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Entries with <10% TTL remaining are now marked stale on lookup, triggering a background refresh before they expire. Combined with the serve-stale + background refresh from the previous commit, this means entries are proactively refreshed — matching Unbound's prefetch behavior. --- src/cache.rs | 3 ++- src/wire.rs | 55 ++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 5f62cc8..fb5889b 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -71,7 +71,8 @@ impl DnsCache { let elapsed = entry.inserted_at.elapsed(); let (remaining, stale) = if elapsed < entry.ttl { let secs = (entry.ttl - elapsed).as_secs() as u32; - (secs.max(1), false) + let near_expiry = elapsed * 10 >= entry.ttl * 9; // <10% TTL remaining + (secs.max(1), near_expiry) } else if elapsed < entry.ttl + STALE_WINDOW { (1, true) } else { diff --git a/src/wire.rs b/src/wire.rs index 6e2c213..aa419f2 100644 --- a/src/wire.rs +++ b/src/wire.rs @@ -957,7 +957,7 @@ mod tests { ); cache.insert("example.com", QueryType::A, &pkt); - let (result, status) = cache + let (result, status, _) = cache .lookup_with_status("example.com", QueryType::A) .expect("should hit"); assert_eq!(result.answers.len(), 1); @@ -974,7 +974,7 @@ mod tests { ); cache.insert("example.com", QueryType::A, &pkt); - let (result, _) = cache + let (result, _, _) = cache .lookup_with_status("example.com", QueryType::A) .unwrap(); // TTL should be <= 300 (at most original, reduced by elapsed time) @@ -1032,7 +1032,7 @@ mod tests { cache.insert("example.com", QueryType::A, &pkt2); assert_eq!(cache.len(), 1); // no double count - let (result, _) = cache + let (result, _, _) = cache .lookup_with_status("example.com", QueryType::A) .unwrap(); match &result.answers[0] { @@ -1208,7 +1208,7 @@ mod tests { ); cache.insert_with_status("example.com", QueryType::A, &pkt, DnssecStatus::Secure); - let (_, status) = cache + let (_, status, _) = cache .lookup_with_status("example.com", QueryType::A) .unwrap(); assert_eq!(status, DnssecStatus::Secure); @@ -1371,4 +1371,51 @@ mod tests { assert!(cache.lookup("test0.com", QueryType::A).is_none()); // evicted assert!(cache.lookup("test2.com", QueryType::A).is_some()); // inserted } + + #[test] + fn lookup_wire_signals_stale_when_expired() { + let mut cache = DnsCache::new(100, 1, 1); // max_ttl=1s so entry expires fast + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 1)], // 1s TTL, clamped to min=1 + ); + cache.insert("example.com", QueryType::A, &pkt); + + // Fresh: not stale + let (_, _, stale) = cache.lookup_wire("example.com", QueryType::A, 0).unwrap(); + assert!(!stale); + + // Wait for expiry + std::thread::sleep(std::time::Duration::from_millis(1100)); + + // Expired but within stale window: stale=true + let (_, _, stale) = cache.lookup_wire("example.com", QueryType::A, 0).unwrap(); + assert!(stale); + } + + #[test] + fn lookup_wire_signals_prefetch_near_expiry() { + let mut cache = DnsCache::new(100, 10, 10); // min_ttl=10, max_ttl=10 → entry gets 10s TTL + let pkt = response( + 0x1234, + "example.com", + vec![a_record("example.com", "1.2.3.4", 10)], + ); + cache.insert("example.com", QueryType::A, &pkt); + + // Fresh (>10% remaining): not stale + let (_, _, stale) = cache.lookup_wire("example.com", QueryType::A, 0).unwrap(); + assert!(!stale); + + // Wait until <10% remaining (>9s elapsed of 10s TTL) + std::thread::sleep(std::time::Duration::from_millis(9100)); + + // Still valid but near expiry: stale=true (triggers prefetch) + let result = cache.lookup_wire("example.com", QueryType::A, 0); + if let Some((_, _, stale)) = result { + assert!(stale, "entry at <10% TTL should signal stale for prefetch"); + } + // (entry may have fully expired on slow CI, so we don't assert Some) + } } -- 2.34.1 From 3c49b0e65d643b0c05aa86d5e26c690ff5bf7cb7 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 19:49:23 +0300 Subject: [PATCH 028/139] fix: deduplicate background refresh with per-domain guard Multiple stale queries for the same domain now spawn only one background refresh. A HashSet<(String, QueryType)> on ServerCtx tracks in-flight refreshes; subsequent stale hits for the same key skip the spawn. --- src/api.rs | 1 + src/ctx.rs | 16 ++++++++++++---- src/dot.rs | 1 + src/main.rs | 1 + 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/api.rs b/src/api.rs index e638fba..9aa3f60 100644 --- a/src/api.rs +++ b/src/api.rs @@ -1012,6 +1012,7 @@ mod tests { socket, zone_map: std::collections::HashMap::new(), cache: RwLock::new(crate::cache::DnsCache::new(100, 60, 86400)), + refreshing: Mutex::new(std::collections::HashSet::new()), stats: Mutex::new(crate::stats::ServerStats::new()), overrides: RwLock::new(crate::override_store::OverrideStore::new()), blocklist: RwLock::new(crate::blocklist::BlocklistStore::new()), diff --git a/src/ctx.rs b/src/ctx.rs index c1f28f2..8632a28 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::net::SocketAddr; use std::path::PathBuf; use std::sync::{Arc, Mutex, RwLock}; @@ -35,6 +35,8 @@ pub struct ServerCtx { pub zone_map: ZoneMap, /// std::sync::RwLock (not tokio) — locks must never be held across .await points. pub cache: RwLock, + /// Domains currently being refreshed in the background (dedup guard). + pub refreshing: Mutex>, pub stats: Mutex, pub overrides: RwLock, pub blocklist: RwLock, @@ -168,9 +170,15 @@ pub async fn resolve_query( let cached = ctx.cache.read().unwrap().lookup_with_status(&qname, qtype); if let Some((cached, cached_dnssec, stale)) = cached { if stale { - let ctx = Arc::clone(ctx); - let qname = qname.clone(); - tokio::spawn(async move { warm_stale(&ctx, &qname, qtype).await }); + let key = (qname.clone(), qtype); + let already = !ctx.refreshing.lock().unwrap().insert(key.clone()); + if !already { + let ctx = Arc::clone(ctx); + tokio::spawn(async move { + warm_stale(&ctx, &key.0, key.1).await; + ctx.refreshing.lock().unwrap().remove(&key); + }); + } } let mut resp = cached; resp.header.id = query.header.id; diff --git a/src/dot.rs b/src/dot.rs index be22375..0216dbf 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -357,6 +357,7 @@ mod tests { m }, cache: RwLock::new(crate::cache::DnsCache::new(100, 60, 86400)), + refreshing: Mutex::new(std::collections::HashSet::new()), stats: Mutex::new(crate::stats::ServerStats::new()), overrides: RwLock::new(crate::override_store::OverrideStore::new()), blocklist: RwLock::new(crate::blocklist::BlocklistStore::new()), diff --git a/src/main.rs b/src/main.rs index ebc16cc..9aa3f17 100644 --- a/src/main.rs +++ b/src/main.rs @@ -285,6 +285,7 @@ async fn main() -> numa::Result<()> { config.cache.min_ttl, config.cache.max_ttl, )), + refreshing: Mutex::new(std::collections::HashSet::new()), stats: Mutex::new(ServerStats::new()), overrides: RwLock::new(OverrideStore::new()), blocklist: RwLock::new(blocklist), -- 2.34.1 From 6d9ee14ea6333c510e1972625fa9667a505e4996 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 19:56:42 +0300 Subject: [PATCH 029/139] refactor: unify warm_stale/warm_domain, remove raw_wire alloc, add Freshness enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract refresh_entry in ctx.rs — warm_domain in main.rs now delegates to it instead of duplicating the resolve+cache logic (~40 lines removed) - Eliminate unconditional .to_vec() of raw wire on every UDP/DoT query — pass &buffer.buf[..len] directly (zero-cost for cache hits) - Replace bare bool stale flag with Freshness enum (Fresh/NearExpiry/Stale) making the three states self-documenting at every call site --- src/cache.rs | 38 +++++++++++++++++++++++++++--------- src/ctx.rs | 14 +++++++------- src/dot.rs | 3 +-- src/main.rs | 54 +++++----------------------------------------------- src/wire.rs | 29 ++++++++++++---------------- 5 files changed, 54 insertions(+), 84 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index fb5889b..18fdc19 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -6,6 +6,22 @@ use crate::packet::DnsPacket; use crate::question::QueryType; use crate::wire::WireMeta; +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Freshness { + /// Within TTL, no action needed. + Fresh, + /// Within TTL but <10% remaining — trigger background prefetch. + NearExpiry, + /// Past TTL but within stale window — serve with TTL=1, trigger background refresh. + Stale, +} + +impl Freshness { + pub fn needs_refresh(self) -> bool { + matches!(self, Freshness::NearExpiry | Freshness::Stale) + } +} + #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub enum DnssecStatus { Secure, @@ -64,17 +80,21 @@ impl DnsCache { domain: &str, qtype: QueryType, new_id: u16, - ) -> Option<(Vec, DnssecStatus, bool)> { + ) -> Option<(Vec, DnssecStatus, Freshness)> { let type_map = self.entries.get(domain)?; let entry = type_map.get(&qtype)?; let elapsed = entry.inserted_at.elapsed(); - let (remaining, stale) = if elapsed < entry.ttl { + let (remaining, freshness) = if elapsed < entry.ttl { let secs = (entry.ttl - elapsed).as_secs() as u32; - let near_expiry = elapsed * 10 >= entry.ttl * 9; // <10% TTL remaining - (secs.max(1), near_expiry) + let f = if elapsed * 10 >= entry.ttl * 9 { + Freshness::NearExpiry + } else { + Freshness::Fresh + }; + (secs.max(1), f) } else if elapsed < entry.ttl + STALE_WINDOW { - (1, true) + (1, Freshness::Stale) } else { return None; }; @@ -83,7 +103,7 @@ impl DnsCache { crate::wire::patch_id(&mut wire, new_id); crate::wire::patch_ttls(&mut wire, &entry.meta.ttl_offsets, remaining); - Some((wire, entry.dnssec_status, stale)) + Some((wire, entry.dnssec_status, freshness)) } pub fn insert_wire( @@ -141,11 +161,11 @@ impl DnsCache { &self, domain: &str, qtype: QueryType, - ) -> Option<(DnsPacket, DnssecStatus, bool)> { - let (wire, status, stale) = self.lookup_wire(domain, qtype, 0)?; + ) -> Option<(DnsPacket, DnssecStatus, Freshness)> { + let (wire, status, freshness) = self.lookup_wire(domain, qtype, 0)?; let mut buf = BytePacketBuffer::from_bytes(&wire); let pkt = DnsPacket::from_buffer(&mut buf).ok()?; - Some((pkt, status, stale)) + Some((pkt, status, freshness)) } pub fn insert(&mut self, domain: &str, qtype: QueryType, packet: &DnsPacket) { diff --git a/src/ctx.rs b/src/ctx.rs index 8632a28..e97a7ea 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -168,14 +168,14 @@ pub async fn resolve_query( (resp, QueryPath::Blocked, DnssecStatus::Indeterminate) } else { let cached = ctx.cache.read().unwrap().lookup_with_status(&qname, qtype); - if let Some((cached, cached_dnssec, stale)) = cached { - if stale { + if let Some((cached, cached_dnssec, freshness)) = cached { + if freshness.needs_refresh() { let key = (qname.clone(), qtype); let already = !ctx.refreshing.lock().unwrap().insert(key.clone()); if !already { let ctx = Arc::clone(ctx); tokio::spawn(async move { - warm_stale(&ctx, &key.0, key.1).await; + refresh_entry(&ctx, &key.0, key.1).await; ctx.refreshing.lock().unwrap().remove(&key); }); } @@ -388,8 +388,9 @@ fn cache_and_parse( DnsPacket::from_buffer(&mut buf) } -/// Background refresh for a stale cache entry (RFC 8767 revalidation). -async fn warm_stale(ctx: &ServerCtx, qname: &str, qtype: QueryType) { +/// Re-resolve a single (domain, qtype) and update the cache. +/// Used for both stale-entry refresh and proactive cache warming. +pub async fn refresh_entry(ctx: &ServerCtx, qname: &str, qtype: QueryType) { let query = DnsPacket::query(0, qname, qtype); if ctx.upstream_mode == UpstreamMode::Recursive { if let Ok(resp) = crate::recursive::resolve_recursive( @@ -445,7 +446,6 @@ pub async fn handle_query( src_addr: SocketAddr, ctx: &Arc, ) -> crate::Result<()> { - let raw_wire = buffer.buf[..raw_len].to_vec(); let query = match DnsPacket::from_buffer(&mut buffer) { Ok(packet) => packet, Err(e) => { @@ -453,7 +453,7 @@ pub async fn handle_query( return Ok(()); } }; - match resolve_query(query, &raw_wire, src_addr, ctx).await { + match resolve_query(query, &buffer.buf[..raw_len], src_addr, ctx).await { Ok(resp_buffer) => { ctx.socket.send_to(resp_buffer.filled(), src_addr).await?; } diff --git a/src/dot.rs b/src/dot.rs index 0216dbf..d4eeb95 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -180,7 +180,6 @@ async fn handle_dot_connection( break; }; - let raw_wire = buffer.buf[..msg_len].to_vec(); let query = match DnsPacket::from_buffer(&mut buffer) { Ok(q) => q, Err(e) => { @@ -202,7 +201,7 @@ async fn handle_dot_connection( } }; - match resolve_query(query.clone(), &raw_wire, remote_addr, ctx).await { + match resolve_query(query.clone(), &buffer.buf[..msg_len], remote_addr, ctx).await { Ok(resp_buffer) => { if write_framed(&mut stream, resp_buffer.filled()) .await diff --git a/src/main.rs b/src/main.rs index 9aa3f17..1ec7791 100644 --- a/src/main.rs +++ b/src/main.rs @@ -758,55 +758,11 @@ async fn load_blocklists(ctx: &ServerCtx, lists: &[String]) { } async fn warm_domain(ctx: &ServerCtx, domain: &str) { - use numa::question::QueryType; - - for qtype in [QueryType::A, QueryType::AAAA] { - if ctx.upstream_mode == numa::config::UpstreamMode::Recursive { - let query = numa::packet::DnsPacket::query(0, domain, qtype); - match numa::recursive::resolve_recursive( - domain, - qtype, - &ctx.cache, - &query, - &ctx.root_hints, - &ctx.srtt, - ) - .await - { - Ok(resp) => { - ctx.cache.write().unwrap().insert(domain, qtype, &resp); - log::debug!("cache warm: {} {:?}", domain, qtype); - } - Err(e) => log::warn!("cache warm: {} {:?} failed: {}", domain, qtype, e), - } - } else { - let query = numa::packet::DnsPacket::query(0, domain, qtype); - let mut buf = numa::buffer::BytePacketBuffer::new(); - if query.write(&mut buf).is_err() { - continue; - } - let pool = ctx.upstream_pool.lock().unwrap().clone(); - match numa::forward::forward_with_failover_raw( - buf.filled(), - &pool, - &ctx.srtt, - ctx.timeout, - ctx.hedge_delay, - ) - .await - { - Ok(wire) => { - ctx.cache.write().unwrap().insert_wire( - domain, - qtype, - &wire, - numa::cache::DnssecStatus::Indeterminate, - ); - log::debug!("cache warm: {} {:?}", domain, qtype); - } - Err(e) => log::warn!("cache warm: {} {:?} failed: {}", domain, qtype, e), - } - } + for qtype in [ + numa::question::QueryType::A, + numa::question::QueryType::AAAA, + ] { + numa::ctx::refresh_entry(ctx, domain, qtype).await; } } diff --git a/src/wire.rs b/src/wire.rs index aa419f2..3ee2ab3 100644 --- a/src/wire.rs +++ b/src/wire.rs @@ -1374,29 +1374,28 @@ mod tests { #[test] fn lookup_wire_signals_stale_when_expired() { + use crate::cache::Freshness; let mut cache = DnsCache::new(100, 1, 1); // max_ttl=1s so entry expires fast let pkt = response( 0x1234, "example.com", - vec![a_record("example.com", "1.2.3.4", 1)], // 1s TTL, clamped to min=1 + vec![a_record("example.com", "1.2.3.4", 1)], ); cache.insert("example.com", QueryType::A, &pkt); - // Fresh: not stale - let (_, _, stale) = cache.lookup_wire("example.com", QueryType::A, 0).unwrap(); - assert!(!stale); + let (_, _, f) = cache.lookup_wire("example.com", QueryType::A, 0).unwrap(); + assert_eq!(f, Freshness::Fresh); - // Wait for expiry std::thread::sleep(std::time::Duration::from_millis(1100)); - // Expired but within stale window: stale=true - let (_, _, stale) = cache.lookup_wire("example.com", QueryType::A, 0).unwrap(); - assert!(stale); + let (_, _, f) = cache.lookup_wire("example.com", QueryType::A, 0).unwrap(); + assert_eq!(f, Freshness::Stale); } #[test] fn lookup_wire_signals_prefetch_near_expiry() { - let mut cache = DnsCache::new(100, 10, 10); // min_ttl=10, max_ttl=10 → entry gets 10s TTL + use crate::cache::Freshness; + let mut cache = DnsCache::new(100, 10, 10); let pkt = response( 0x1234, "example.com", @@ -1404,18 +1403,14 @@ mod tests { ); cache.insert("example.com", QueryType::A, &pkt); - // Fresh (>10% remaining): not stale - let (_, _, stale) = cache.lookup_wire("example.com", QueryType::A, 0).unwrap(); - assert!(!stale); + let (_, _, f) = cache.lookup_wire("example.com", QueryType::A, 0).unwrap(); + assert_eq!(f, Freshness::Fresh); - // Wait until <10% remaining (>9s elapsed of 10s TTL) std::thread::sleep(std::time::Duration::from_millis(9100)); - // Still valid but near expiry: stale=true (triggers prefetch) let result = cache.lookup_wire("example.com", QueryType::A, 0); - if let Some((_, _, stale)) = result { - assert!(stale, "entry at <10% TTL should signal stale for prefetch"); + if let Some((_, _, f)) = result { + assert_eq!(f, Freshness::NearExpiry); } - // (entry may have fully expired on slow CI, so we don't assert Some) } } -- 2.34.1 From 51848919858053895887afad7510eee7b7d71c24 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 20:43:46 +0300 Subject: [PATCH 030/139] fix: cold benchmark cache-busting with PID prefix and flush Re-runs of --vs-unbound-cold were hitting stale cache entries from prior runs. The static COUNTER reset to 0 each process, generating the same c0.example.com subdomains. With the 1-hour stale window, entries from 10 minutes ago served as stale hits. Fix: prefix with PID (r{pid}-c{n}.domain) and flush Numa's cache before cold benchmarks. --- benches/recursive_compare.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/benches/recursive_compare.rs b/benches/recursive_compare.rs index dcff2c5..8f3b079 100644 --- a/benches/recursive_compare.rs +++ b/benches/recursive_compare.rs @@ -403,6 +403,8 @@ fn run_server_comparison( ) { use std::sync::atomic::{AtomicU64, Ordering}; static COUNTER: AtomicU64 = AtomicU64::new(0); + // Unique prefix per process so re-runs don't hit stale cache entries + let run_id = std::process::id(); let numa_addr: SocketAddr = NUMA_BENCH.parse().unwrap(); let other: SocketAddr = other_addr.parse().unwrap(); @@ -414,6 +416,10 @@ fn run_server_comparison( } } + if cold { + flush_cache(); // flush Numa's record cache + } + println!("Warming up..."); for _ in 0..5 { let _ = rt.block_on(query_udp(numa_addr, "example.com")); @@ -433,7 +439,12 @@ fn run_server_comparison( other_name, &|domain| { let d = if cold { - format!("c{}.{}", COUNTER.fetch_add(1, Ordering::Relaxed), domain) + format!( + "r{}-c{}.{}", + run_id, + COUNTER.fetch_add(1, Ordering::Relaxed), + domain + ) } else { domain.to_string() }; @@ -443,7 +454,12 @@ fn run_server_comparison( }, &|domain| { let d = if cold { - format!("c{}.{}", COUNTER.fetch_add(1, Ordering::Relaxed), domain) + format!( + "r{}-c{}.{}", + run_id, + COUNTER.fetch_add(1, Ordering::Relaxed), + domain + ) } else { domain.to_string() }; -- 2.34.1 From 50828c411a5545ff115ab863c1d5258feae4998b Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 20:54:27 +0300 Subject: [PATCH 031/139] fix: cold benchmark uses 1 round per domain for genuine cold measurements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With ROUNDS=10, only the first query per domain was truly cold — the other 9 hit cached NS delegations at <1ms, diluting the median to 0.4ms. Now cold mode uses 1 round so every sample is a real cold resolve. Also extracted compare_two_rounds to support per-mode rounds. --- benches/recursive_compare.rs | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/benches/recursive_compare.rs b/benches/recursive_compare.rs index 8f3b079..f1a59d2 100644 --- a/benches/recursive_compare.rs +++ b/benches/recursive_compare.rs @@ -183,13 +183,28 @@ fn compare_two( measure_a: &dyn Fn(&str) -> f64, measure_b: &dyn Fn(&str) -> f64, iterations: usize, +) { + compare_two_rounds( + rt, title, name_a, name_b, measure_a, measure_b, iterations, ROUNDS, + ); +} + +fn compare_two_rounds( + rt: &tokio::runtime::Runtime, + title: &str, + name_a: &str, + name_b: &str, + measure_a: &dyn Fn(&str) -> f64, + measure_b: &dyn Fn(&str) -> f64, + iterations: usize, + rounds: usize, ) { let flush = std::env::args().any(|a| a == "--flush"); println!("{}", title); println!( "{} domains × {} rounds × {} iterations\n", DOMAINS.len(), - ROUNDS, + rounds, iterations ); @@ -203,7 +218,7 @@ fn compare_two( let mut b = Vec::new(); for domain in DOMAINS { - for round in 0..ROUNDS { + for round in 0..rounds { if flush { flush_cache(); std::thread::sleep(Duration::from_millis(5)); @@ -230,6 +245,7 @@ fn compare_two( &mut all_a, &mut all_b, iterations, + rounds, ); } @@ -240,6 +256,7 @@ fn print_results( all_a: &mut Vec, all_b: &mut Vec, iterations: usize, + rounds: usize, ) { let w = name_a.len().max(name_b.len()).max(6); @@ -270,7 +287,7 @@ fn print_results( let (a_m, a_med, a_p95, a_p99, a_sd) = stats(all_a); let (b_m, b_med, b_p95, b_p99, b_sd) = stats(all_b); - let total = iterations * DOMAINS.len() * ROUNDS; + let total = iterations * DOMAINS.len() * rounds; println!("\n=== Aggregated ({} samples per method) ===\n", total); println!("{:<10} {:>w$} {:>w$}", "", name_a, name_b, w = w + 3); println!("{:<10} {:>w$.1} ms {:>w$.1} ms", "mean", a_m, b_m, w = w); @@ -432,7 +449,9 @@ fn run_server_comparison( "caching" }; - compare_two( + let rounds = if cold { 1 } else { ROUNDS }; + + compare_two_rounds( rt, &format!("Server-to-Server: Numa vs {other_name} (UDP, {tag})"), "Numa", @@ -468,6 +487,7 @@ fn run_server_comparison( t.elapsed().as_secs_f64() * 1000.0 }, iterations, + rounds, ); } -- 2.34.1 From 02e1449a4544e251be0e74336fb93cda7f3c920e Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 21:34:47 +0300 Subject: [PATCH 032/139] feat: enable request hedging for all upstream protocols Hedging was DoH-only (hyper dispatch spike mitigation). Now applies to UDP (rescues packet loss) and DoT (rescues TLS handshake stalls) too. Same-upstream hedging: fires a second independent request after hedge_ms delay. First response wins. Disable with hedge_ms = 0. --- src/forward.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/forward.rs b/src/forward.rs index 839ac81..e13e360 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -360,9 +360,11 @@ pub async fn forward_with_failover_raw( for upstream in &all_upstreams { let start = Instant::now(); - let result = if !hedge_delay.is_zero() && matches!(upstream, Upstream::Doh { .. }) { - // Hedge against the same upstream: parallel h2 streams on same - // connection. Independent stream scheduling rescues dispatch spikes. + let result = if !hedge_delay.is_zero() { + // Hedge against the same upstream: independent h2 streams (DoH), + // independent UDP packets (plain DNS), or independent TLS + // connections (DoT). Rescues packet loss, dispatch spikes, and + // TLS handshake stalls. forward_with_hedging_raw(wire, upstream, upstream, hedge_delay, timeout_duration).await } else { forward_query_raw(wire, upstream, timeout_duration).await -- 2.34.1 From 8085c1068773ccb3a11ad82a61f7523910ea4b87 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 21:37:59 +0300 Subject: [PATCH 033/139] docs: document hedge_ms, tls:// upstream, update max_entries default in numa.toml --- numa.toml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/numa.toml b/numa.toml index 3b716e8..1ea3341 100644 --- a/numa.toml +++ b/numa.toml @@ -15,9 +15,15 @@ api_port = 5380 # address = "9.9.9.9" # single upstream (plain UDP) # address = ["192.168.1.1", "9.9.9.9:5353"] # multiple upstreams — SRTT picks fastest # address = "https://dns.quad9.net/dns-query" # DNS-over-HTTPS (encrypted) +# address = "tls://9.9.9.9#dns.quad9.net" # DNS-over-TLS (encrypted, port 853) # fallback = ["8.8.8.8", "1.1.1.1"] # tried only when all primaries fail # port = 53 # default port for addresses without :port # timeout_ms = 3000 +# hedge_ms = 10 # request hedging delay (ms). After this delay +# # without a response, fires a parallel request +# # to the same upstream. Rescues packet loss (UDP), +# # dispatch spikes (DoH), TLS stalls (DoT). +# # Set to 0 to disable. Default: 10 # root_hints = [ # only used in recursive mode # "198.41.0.4", # a.root-servers.net (Verisign) # "199.9.14.201", # b.root-servers.net (USC-ISI) @@ -60,7 +66,7 @@ api_port = 5380 # allowlist = ["example.com"] # domains to never block [cache] -max_entries = 10000 +max_entries = 100000 min_ttl = 60 max_ttl = 86400 # warm = ["google.com", "github.com"] # resolve at startup, refresh before TTL expiry -- 2.34.1 From 2101dfcf172b69d52a5319970bf5de183ae284ff Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 22:14:26 +0300 Subject: [PATCH 034/139] feat: transport protocol tracking (UDP/TCP/DoT/DoH) with dashboard visualization Thread Transport enum through resolve pipeline, record per-query transport in stats and query log. Dashboard gets bar chart panel with encryption %, transport column in query log, and filter dropdown. --- site/dashboard.html | 89 +++++++++++++++++++++++++++++++++++++-------- src/api.rs | 17 +++++++++ src/ctx.rs | 9 +++-- src/doh.rs | 3 +- src/dot.rs | 11 +++++- src/main.rs | 4 +- src/query_log.rs | 4 +- src/stats.rs | 43 +++++++++++++++++++++- 8 files changed, 156 insertions(+), 24 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index 5fa9777..2d9cc60 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -223,6 +223,10 @@ body { .path-bar-fill.override { background: var(--emerald); } .path-bar-fill.error { background: var(--rose); } .path-bar-fill.blocked { background: var(--text-dim); } +.path-bar-fill.udp { background: var(--text-dim); } +.path-bar-fill.tcp { background: var(--violet); } +.path-bar-fill.dot { background: var(--emerald); } +.path-bar-fill.doh { background: var(--teal); } .path-pct { font-family: var(--font-mono); font-size: 0.75rem; @@ -288,6 +292,10 @@ body { .path-tag.SERVFAIL { background: rgba(181, 68, 58, 0.12); color: var(--rose); } .path-tag.BLOCKED { background: rgba(163, 152, 136, 0.15); color: var(--text-dim); } .path-tag.COALESCED { background: rgba(138, 104, 158, 0.12); color: var(--violet-dim); } +.path-tag.UDP { background: rgba(163, 152, 136, 0.15); color: var(--text-dim); } +.path-tag.TCP { background: rgba(100, 116, 139, 0.12); color: var(--violet-dim); } +.path-tag.DOT { background: rgba(82, 122, 82, 0.12); color: var(--emerald); } +.path-tag.DOH { background: rgba(107, 124, 78, 0.12); color: var(--teal); } .src-tag { font-size: 0.6rem; color: var(--text-dim); letter-spacing: 0.02em; } /* Sidebar panels */ @@ -622,6 +630,16 @@ body { + +
+
+ Transport + +
+
+
+
+
@@ -643,6 +661,14 @@ body { +
@@ -654,6 +680,7 @@ body { Type Domain Path + Transport Result Latency @@ -907,6 +934,27 @@ function renderMemory(mem, stats) { `; } +function renderBarChart(containerId, defs, data, total) { + total = total || 1; + document.getElementById(containerId).innerHTML = defs.map(d => { + const count = data[d.key] || 0; + const pct = ((count / total) * 100).toFixed(1); + return ` +
+ ${d.label} +
+
+
+ ${pct}% +
`; + }).join(''); +} + +function encryptionPct(transport) { + const total = (transport.udp + transport.tcp + transport.dot + transport.doh) || 1; + return (((transport.dot + transport.doh) / total) * 100).toFixed(0); +} + const PATH_DEFS = [ { key: 'forwarded', label: 'Forward', cls: 'forward' }, { key: 'recursive', label: 'Recursive', cls: 'recursive' }, @@ -918,20 +966,23 @@ const PATH_DEFS = [ ]; function renderPaths(queries) { - const total = queries.total || 1; - const container = document.getElementById('pathBars'); - container.innerHTML = PATH_DEFS.map(p => { - const count = queries[p.key] || 0; - const pct = ((count / total) * 100).toFixed(1); - return ` -
- ${p.label} -
-
-
- ${pct}% -
`; - }).join(''); + renderBarChart('pathBars', PATH_DEFS, queries, queries.total); +} + +const TRANSPORT_DEFS = [ + { key: 'udp', label: 'UDP', cls: 'udp' }, + { key: 'tcp', label: 'TCP', cls: 'tcp' }, + { key: 'dot', label: 'DoT', cls: 'dot' }, + { key: 'doh', label: 'DoH', cls: 'doh' }, +]; + +function renderTransport(transport) { + const total = (transport.udp + transport.tcp + transport.dot + transport.doh) || 1; + renderBarChart('transportBars', TRANSPORT_DEFS, transport, total); + const encPct = encryptionPct(transport); + const el = document.getElementById('transportEncrypted'); + el.textContent = `${encPct}% encrypted`; + el.style.color = encPct >= 80 ? 'var(--emerald)' : encPct >= 50 ? 'var(--amber)' : 'var(--rose)'; } function renderQueryLog(entries) { @@ -942,6 +993,7 @@ function renderQueryLog(entries) { function applyLogFilter() { const domainFilter = document.getElementById('logFilterDomain').value.trim().toLowerCase(); const pathFilter = document.getElementById('logFilterPath').value; + const transportFilter = document.getElementById('logFilterTransport').value; let filtered = lastLogEntries; if (domainFilter) { @@ -950,6 +1002,9 @@ function applyLogFilter() { if (pathFilter) { filtered = filtered.filter(e => e.path === pathFilter); } + if (transportFilter) { + filtered = filtered.filter(e => e.transport === transportFilter); + } const tbody = document.getElementById('queryLogBody'); document.getElementById('queryCount').textContent = @@ -967,6 +1022,7 @@ function applyLogFilter() { ${e.query_type} ${e.domain}${allowBtn} ${e.path} + ${e.transport} ${e.dnssec === 'secure' ? '' : ''}${e.rescode} ${e.latency_ms.toFixed(1)}ms `; @@ -1141,11 +1197,13 @@ async function refresh() { // QPS calculation const now = Date.now(); + const encPct = encryptionPct(stats.transport); if (prevTotal !== null && prevTime !== null) { const dt = (now - prevTime) / 1000; const dq = q.total - prevTotal; const qps = dt > 0 ? (dq / dt).toFixed(1) : '0.0'; - document.getElementById('qps').textContent = `~${qps}/s`; + const encTag = q.total > 0 ? ` · ${encPct}% enc` : ''; + document.getElementById('qps').textContent = `~${qps}/s${encTag}`; } prevTotal = q.total; prevTime = now; @@ -1157,6 +1215,7 @@ async function refresh() { // Panels renderPaths(q); + renderTransport(stats.transport); renderQueryLog(logs); renderOverrides(overrides); renderCache(cache); diff --git a/src/api.rs b/src/api.rs index 9aa3f60..fcc0bd9 100644 --- a/src/api.rs +++ b/src/api.rs @@ -152,6 +152,7 @@ struct QueryLogResponse { domain: String, query_type: String, path: String, + transport: String, rescode: String, latency_ms: f64, dnssec: String, @@ -167,6 +168,7 @@ struct StatsResponse { dnssec: bool, srtt: bool, queries: QueriesStats, + transport: TransportStats, cache: CacheStats, overrides: OverrideStats, blocking: BlockingStatsResponse, @@ -175,6 +177,14 @@ struct StatsResponse { memory: MemoryStats, } +#[derive(Serialize)] +struct TransportStats { + udp: u64, + tcp: u64, + dot: u64, + doh: u64, +} + #[derive(Serialize)] struct MobileStatsResponse { enabled: bool, @@ -483,6 +493,7 @@ async fn query_log( domain: e.domain.clone(), query_type: e.query_type.as_str().to_string(), path: e.path.as_str().to_string(), + transport: e.transport.as_str().to_string(), rescode: e.rescode.as_str().to_string(), latency_ms: e.latency_us as f64 / 1000.0, dnssec: e.dnssec.as_str().to_string(), @@ -545,6 +556,12 @@ async fn stats(State(ctx): State>) -> Json { blocked: snap.blocked, errors: snap.errors, }, + transport: TransportStats { + udp: snap.transport_udp, + tcp: snap.transport_tcp, + dot: snap.transport_dot, + doh: snap.transport_doh, + }, cache: CacheStats { entries: cache_len, max_entries: cache_max, diff --git a/src/ctx.rs b/src/ctx.rs index e97a7ea..65b76d3 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -27,7 +27,7 @@ use crate::question::QueryType; use crate::record::DnsRecord; use crate::service_store::ServiceStore; use crate::srtt::SrttCache; -use crate::stats::{QueryPath, ServerStats}; +use crate::stats::{QueryPath, ServerStats, Transport}; use crate::system_dns::ForwardingRule; pub struct ServerCtx { @@ -87,6 +87,7 @@ pub async fn resolve_query( raw_wire: &[u8], src_addr: SocketAddr, ctx: &Arc, + transport: Transport, ) -> crate::Result { let start = Instant::now(); @@ -354,7 +355,7 @@ pub async fn resolve_query( // Record stats and query log { let mut s = ctx.stats.lock().unwrap(); - let total = s.record(path); + let total = s.record(path, transport); if total.is_multiple_of(1000) { s.log_summary(); } @@ -366,6 +367,7 @@ pub async fn resolve_query( domain: qname, query_type: qtype, path, + transport, rescode: response.header.rescode, latency_us: elapsed.as_micros() as u64, dnssec, @@ -445,6 +447,7 @@ pub async fn handle_query( raw_len: usize, src_addr: SocketAddr, ctx: &Arc, + transport: Transport, ) -> crate::Result<()> { let query = match DnsPacket::from_buffer(&mut buffer) { Ok(packet) => packet, @@ -453,7 +456,7 @@ pub async fn handle_query( return Ok(()); } }; - match resolve_query(query, &buffer.buf[..raw_len], src_addr, ctx).await { + match resolve_query(query, &buffer.buf[..raw_len], src_addr, ctx, transport).await { Ok(resp_buffer) => { ctx.socket.send_to(resp_buffer.filled(), src_addr).await?; } diff --git a/src/doh.rs b/src/doh.rs index bc4ba95..7325688 100644 --- a/src/doh.rs +++ b/src/doh.rs @@ -10,6 +10,7 @@ use crate::buffer::BytePacketBuffer; use crate::ctx::{resolve_query, ServerCtx}; use crate::header::ResultCode; use crate::packet::DnsPacket; +use crate::stats::Transport; const MAX_DNS_MSG: usize = 4096; const DOH_CONTENT_TYPE: &str = "application/dns-message"; @@ -86,7 +87,7 @@ async fn resolve_doh( let query_rd = query.header.recursion_desired; let questions = query.questions.clone(); - match resolve_query(query, dns_bytes, src, ctx).await { + match resolve_query(query, dns_bytes, src, ctx, Transport::Doh).await { Ok(resp_buffer) => { let min_ttl = extract_min_ttl(resp_buffer.filled()); dns_response(resp_buffer.filled(), min_ttl) diff --git a/src/dot.rs b/src/dot.rs index d4eeb95..e883e0b 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -15,6 +15,7 @@ use crate::config::DotConfig; use crate::ctx::{resolve_query, ServerCtx}; use crate::header::ResultCode; use crate::packet::DnsPacket; +use crate::stats::Transport; const MAX_CONNECTIONS: usize = 512; const IDLE_TIMEOUT: Duration = Duration::from_secs(30); @@ -201,7 +202,15 @@ async fn handle_dot_connection( } }; - match resolve_query(query.clone(), &buffer.buf[..msg_len], remote_addr, ctx).await { + match resolve_query( + query.clone(), + &buffer.buf[..msg_len], + remote_addr, + ctx, + Transport::Dot, + ) + .await + { Ok(resp_buffer) => { if write_framed(&mut stream, resp_buffer.filled()) .await diff --git a/src/main.rs b/src/main.rs index 1ec7791..bce7add 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,7 +15,7 @@ use numa::forward::{parse_upstream, Upstream, UpstreamPool}; use numa::override_store::OverrideStore; use numa::query_log::QueryLog; use numa::service_store::ServiceStore; -use numa::stats::ServerStats; +use numa::stats::{ServerStats, Transport}; use numa::system_dns::{ discover_system_dns, install_service, restart_service, service_status, uninstall_service, }; @@ -610,7 +610,7 @@ async fn main() -> numa::Result<()> { }; let ctx = Arc::clone(&ctx); tokio::spawn(async move { - if let Err(e) = handle_query(buffer, len, src_addr, &ctx).await { + if let Err(e) = handle_query(buffer, len, src_addr, &ctx, Transport::Udp).await { error!("{} | HANDLER ERROR | {}", src_addr, e); } }); diff --git a/src/query_log.rs b/src/query_log.rs index 1dc2d17..8ce4a6e 100644 --- a/src/query_log.rs +++ b/src/query_log.rs @@ -5,7 +5,7 @@ use std::time::SystemTime; use crate::cache::DnssecStatus; use crate::header::ResultCode; use crate::question::QueryType; -use crate::stats::QueryPath; +use crate::stats::{QueryPath, Transport}; pub struct QueryLogEntry { pub timestamp: SystemTime, @@ -13,6 +13,7 @@ pub struct QueryLogEntry { pub domain: String, pub query_type: QueryType, pub path: QueryPath, + pub transport: Transport, pub rescode: ResultCode, pub latency_us: u64, pub dnssec: DnssecStatus, @@ -107,6 +108,7 @@ mod tests { domain: "example.com".into(), query_type: QueryType::A, path: QueryPath::Forwarded, + transport: Transport::Udp, rescode: ResultCode::NOERROR, latency_us: 500, dnssec: DnssecStatus::Indeterminate, diff --git a/src/stats.rs b/src/stats.rs index c1a176f..feae945 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -97,9 +97,32 @@ pub struct ServerStats { queries_local: u64, queries_overridden: u64, upstream_errors: u64, + transport_udp: u64, + transport_tcp: u64, + transport_dot: u64, + transport_doh: u64, started_at: Instant, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Transport { + Udp, + Tcp, + Dot, + Doh, +} + +impl Transport { + pub fn as_str(&self) -> &'static str { + match self { + Transport::Udp => "UDP", + Transport::Tcp => "TCP", + Transport::Dot => "DOT", + Transport::Doh => "DOH", + } + } +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum QueryPath { Local, @@ -167,11 +190,15 @@ impl ServerStats { queries_local: 0, queries_overridden: 0, upstream_errors: 0, + transport_udp: 0, + transport_tcp: 0, + transport_dot: 0, + transport_doh: 0, started_at: Instant::now(), } } - pub fn record(&mut self, path: QueryPath) -> u64 { + pub fn record(&mut self, path: QueryPath, transport: Transport) -> u64 { self.queries_total += 1; match path { QueryPath::Local => self.queries_local += 1, @@ -183,6 +210,12 @@ impl ServerStats { QueryPath::Overridden => self.queries_overridden += 1, QueryPath::UpstreamError => self.upstream_errors += 1, } + match transport { + Transport::Udp => self.transport_udp += 1, + Transport::Tcp => self.transport_tcp += 1, + Transport::Dot => self.transport_dot += 1, + Transport::Doh => self.transport_doh += 1, + } self.queries_total } @@ -206,6 +239,10 @@ impl ServerStats { overridden: self.queries_overridden, blocked: self.queries_blocked, errors: self.upstream_errors, + transport_udp: self.transport_udp, + transport_tcp: self.transport_tcp, + transport_dot: self.transport_dot, + transport_doh: self.transport_doh, } } @@ -242,4 +279,8 @@ pub struct StatsSnapshot { pub overridden: u64, pub blocked: u64, pub errors: u64, + pub transport_udp: u64, + pub transport_tcp: u64, + pub transport_dot: u64, + pub transport_doh: u64, } -- 2.34.1 From 3665deb56bd8e85f1f7fb569ba8ed0944838978c Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 17:56:39 +0300 Subject: [PATCH 035/139] fix: accept loopback addresses for DoH and add IP SANs to TLS cert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DoH endpoint rejected requests with Host: 127.0.0.1/::1/localhost, and the generated TLS cert had no IP SANs — so browsers couldn't use https://127.0.0.1/dns-query even with the CA trusted. - is_doh_host now accepts 127.0.0.1, ::1, localhost (with optional port) - TLS cert includes 127.0.0.1 and ::1 IP SANs, plus bare TLD DNS SAN Closes #87 --- src/doh.rs | 33 +++++++++++++++++++++++---------- src/tls.rs | 14 ++++++++++++++ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/doh.rs b/src/doh.rs index 7325688..917e039 100644 --- a/src/doh.rs +++ b/src/doh.rs @@ -49,16 +49,25 @@ pub async fn doh_post(State(state): State, req: Request) } fn is_doh_host(host: Option<&str>, tld: &str) -> bool { - match host { - Some(h) if h == tld => true, - Some(h) => { - h.len() == 2 * tld.len() + 1 - && h.starts_with(tld) - && h.as_bytes().get(tld.len()) == Some(&b'.') - && h.ends_with(tld) - } - None => false, - } + let h = match host { + Some(h) => h, + None => return false, + }; + is_doh_name(h, tld) + || h.rsplit_once(':').is_some_and(|(base, port)| { + port.bytes().all(|b| b.is_ascii_digit()) && is_doh_name(base, tld) + }) +} + +fn is_doh_name(h: &str, tld: &str) -> bool { + h == tld + || (h.len() == 2 * tld.len() + 1 + && h.starts_with(tld) + && h.as_bytes().get(tld.len()) == Some(&b'.') + && h.ends_with(tld)) + || h == "127.0.0.1" + || h == "::1" + || h == "localhost" } async fn resolve_doh( @@ -148,6 +157,10 @@ mod tests { fn is_doh_host_matches_tld() { assert!(is_doh_host(Some("numa"), "numa")); assert!(is_doh_host(Some("numa.numa"), "numa")); + assert!(is_doh_host(Some("127.0.0.1"), "numa")); + assert!(is_doh_host(Some("127.0.0.1:443"), "numa")); + assert!(is_doh_host(Some("::1"), "numa")); + assert!(is_doh_host(Some("localhost"), "numa")); assert!(!is_doh_host(Some("foo.numa"), "numa")); assert!(!is_doh_host(None, "numa")); } diff --git a/src/tls.rs b/src/tls.rs index e9e2f59..2443f4f 100644 --- a/src/tls.rs +++ b/src/tls.rs @@ -186,6 +186,20 @@ fn generate_service_cert( } } + // Loopback IP SANs so browsers can reach DoH at https://127.0.0.1/dns-query + sans.push(SanType::IpAddress(std::net::IpAddr::V4( + std::net::Ipv4Addr::LOCALHOST, + ))); + sans.push(SanType::IpAddress(std::net::IpAddr::V6( + std::net::Ipv6Addr::LOCALHOST, + ))); + + // Bare TLD (e.g. "numa") for DoH via https://numa/dns-query + match tld.to_string().try_into() { + Ok(ia5) => sans.push(SanType::DnsName(ia5)), + Err(e) => warn!("invalid SAN {}: {}", tld, e), + } + if sans.is_empty() { return Err("no valid service names for TLS cert".into()); } -- 2.34.1 From 115a55b199ff02ca09acda4b4549ddc12742f847 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 22:26:44 +0300 Subject: [PATCH 036/139] fix: bracketed IPv6, localhost SAN, split host-check helpers - is_doh_host split into strip_port + is_loopback_host + is_tld_match - strip_port handles bracketed IPv6 ([::1]:443) and rejects bare IPv6 - Add [::1] to accepted loopback hosts, add localhost DNS SAN to cert - Remove dead sans.is_empty() guard (loopback IPs always present) --- src/doh.rs | 37 +++++++++++++++++++++++++++++-------- src/tls.rs | 13 +++++-------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/doh.rs b/src/doh.rs index 917e039..672402b 100644 --- a/src/doh.rs +++ b/src/doh.rs @@ -53,21 +53,39 @@ fn is_doh_host(host: Option<&str>, tld: &str) -> bool { Some(h) => h, None => return false, }; - is_doh_name(h, tld) - || h.rsplit_once(':').is_some_and(|(base, port)| { - port.bytes().all(|b| b.is_ascii_digit()) && is_doh_name(base, tld) - }) + let base = strip_port(h).unwrap_or(h); + is_loopback_host(base) || is_tld_match(base, tld) } -fn is_doh_name(h: &str, tld: &str) -> bool { +fn strip_port(h: &str) -> Option<&str> { + if h.starts_with('[') { + // [::1]:443 → [::1] + let (base, port) = h.rsplit_once("]:")?; + port.bytes() + .all(|b| b.is_ascii_digit()) + .then(|| &h[..base.len() + 1]) + } else { + let (base, port) = h.rsplit_once(':')?; + // Bare IPv6 like "::1" has multiple colons — not a port suffix + if base.contains(':') { + return None; + } + port.bytes() + .all(|b| b.is_ascii_digit()) + .then_some(base) + } +} + +fn is_loopback_host(h: &str) -> bool { + matches!(h, "127.0.0.1" | "::1" | "[::1]" | "localhost") +} + +fn is_tld_match(h: &str, tld: &str) -> bool { h == tld || (h.len() == 2 * tld.len() + 1 && h.starts_with(tld) && h.as_bytes().get(tld.len()) == Some(&b'.') && h.ends_with(tld)) - || h == "127.0.0.1" - || h == "::1" - || h == "localhost" } async fn resolve_doh( @@ -160,7 +178,10 @@ mod tests { assert!(is_doh_host(Some("127.0.0.1"), "numa")); assert!(is_doh_host(Some("127.0.0.1:443"), "numa")); assert!(is_doh_host(Some("::1"), "numa")); + assert!(is_doh_host(Some("[::1]"), "numa")); + assert!(is_doh_host(Some("[::1]:443"), "numa")); assert!(is_doh_host(Some("localhost"), "numa")); + assert!(is_doh_host(Some("localhost:443"), "numa")); assert!(!is_doh_host(Some("foo.numa"), "numa")); assert!(!is_doh_host(None, "numa")); } diff --git a/src/tls.rs b/src/tls.rs index 2443f4f..9167904 100644 --- a/src/tls.rs +++ b/src/tls.rs @@ -194,14 +194,11 @@ fn generate_service_cert( std::net::Ipv6Addr::LOCALHOST, ))); - // Bare TLD (e.g. "numa") for DoH via https://numa/dns-query - match tld.to_string().try_into() { - Ok(ia5) => sans.push(SanType::DnsName(ia5)), - Err(e) => warn!("invalid SAN {}: {}", tld, e), - } - - if sans.is_empty() { - return Err("no valid service names for TLS cert".into()); + for name in ["localhost", tld] { + match name.to_string().try_into() { + Ok(ia5) => sans.push(SanType::DnsName(ia5)), + Err(e) => warn!("invalid SAN {}: {}", name, e), + } } params.subject_alt_names = sans; -- 2.34.1 From bd505813b6f3d852e7955bc07c0b986dcb54d387 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 23:42:45 +0300 Subject: [PATCH 037/139] test: verify TLS cert SANs (wildcard, services, loopback, localhost, bare TLD) Parse the generated DER cert with x509-parser to assert the exact SAN set, catching silent try_into() failures that a params-level test would miss. --- Cargo.toml | 1 + src/tls.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index d7f6f9f..6ab0972 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ tower = { version = "0.5", features = ["util"] } http = "1" hickory-resolver = { version = "0.25", features = ["https-ring", "webpki-roots"] } hickory-proto = "0.25" +x509-parser = "0.18" [[bench]] name = "hot_path" diff --git a/src/tls.rs b/src/tls.rs index 9167904..22a00a4 100644 --- a/src/tls.rs +++ b/src/tls.rs @@ -251,4 +251,72 @@ mod tests { let err: crate::Error = "rcgen failure".into(); assert!(try_data_dir_advisory(&err, &PathBuf::from("/x")).is_none()); } + + #[test] + fn service_cert_contains_expected_sans() { + use x509_parser::prelude::GeneralName; + + let dir = std::env::temp_dir().join(format!("numa-test-san-{}", std::process::id())); + let _ = std::fs::remove_dir_all(&dir); + let (ca_der, issuer) = ensure_ca(&dir).unwrap(); + + let names = vec!["grafana".into(), "router".into()]; + let (chain, _) = generate_service_cert(&ca_der, &issuer, "numa", &names).unwrap(); + assert_eq!(chain.len(), 2, "chain should be [leaf, CA]"); + + let (_, cert) = x509_parser::parse_x509_certificate(chain[0].as_ref()).unwrap(); + let san = cert + .tbs_certificate + .subject_alternative_name() + .unwrap() + .unwrap(); + + let dns: Vec<&str> = san + .value + .general_names + .iter() + .filter_map(|gn| match gn { + GeneralName::DNSName(s) => Some(*s), + _ => None, + }) + .collect(); + + let ips: Vec = san + .value + .general_names + .iter() + .filter_map(|gn| match gn { + GeneralName::IPAddress(b) => match b.len() { + 4 => Some(std::net::IpAddr::V4(std::net::Ipv4Addr::new( + b[0], b[1], b[2], b[3], + ))), + 16 => { + let a: [u8; 16] = (*b).try_into().unwrap(); + Some(std::net::IpAddr::V6(std::net::Ipv6Addr::from(a))) + } + _ => None, + }, + _ => None, + }) + .collect(); + + // DNS SANs + assert!(dns.contains(&"*.numa"), "missing wildcard SAN"); + assert!(dns.contains(&"grafana.numa"), "missing service SAN"); + assert!(dns.contains(&"router.numa"), "missing service SAN"); + assert!(dns.contains(&"localhost"), "missing localhost SAN"); + assert!(dns.contains(&"numa"), "missing bare TLD SAN"); + + // IP SANs + assert!( + ips.contains(&std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST)), + "missing 127.0.0.1 SAN" + ); + assert!( + ips.contains(&std::net::IpAddr::V6(std::net::Ipv6Addr::LOCALHOST)), + "missing ::1 SAN" + ); + + let _ = std::fs::remove_dir_all(&dir); + } } -- 2.34.1 From 305935ed9867db6e7877c81b38114a3190b9a004 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 12 Apr 2026 23:59:51 +0300 Subject: [PATCH 038/139] style: rustfmt strip_port --- src/doh.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/doh.rs b/src/doh.rs index 672402b..f90b919 100644 --- a/src/doh.rs +++ b/src/doh.rs @@ -70,9 +70,7 @@ fn strip_port(h: &str) -> Option<&str> { if base.contains(':') { return None; } - port.bytes() - .all(|b| b.is_ascii_digit()) - .then_some(base) + port.bytes().all(|b| b.is_ascii_digit()).then_some(base) } } -- 2.34.1 From 77d2c8bbcd93a2c2f2a35df28d1108659b796827 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 00:18:52 +0300 Subject: [PATCH 039/139] docs: update README comparison table, performance, and roadmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Comparison table: add DoH/DoT upstream, DoH server, request hedging, serve-stale + prefetch, conditional forwarding rows - Performance: update with current benchmark numbers (0.1ms cached, 47x NextDNS, p99 -28% vs Unbound) - Roadmap: add hedging, serve-stale, conditional forwarding, DoT upstream - Fix broken benchmarks link (bench/ → benches/) --- README.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 44b8aa4..5c632cf 100644 --- a/README.md +++ b/README.md @@ -113,14 +113,18 @@ From Machine B: `curl http://api.numa` → proxied to Machine A's port 8000. Ena | DNSSEC validation | — | — | Yes | Yes (RSA, ECDSA, Ed25519) | | Ad blocking | Yes | Yes | — | 385K+ domains | | Web admin UI | Full | Full | — | Dashboard | -| Encrypted upstream (DoH) | Needs cloudflared | Yes | — | Native | +| Encrypted upstream (DoH/DoT) | Needs cloudflared | DoH only | DoT only | DoH + DoT (`tls://`) | | Encrypted clients (DoT listener) | Needs stunnel sidecar | Yes | Yes | Native (RFC 7858) | +| DoH server endpoint | — | Yes | — | Yes (RFC 8484) | +| Request hedging | — | — | — | All protocols (UDP, DoH, DoT) | +| Serve-stale + prefetch | — | — | Prefetch at 90% TTL | RFC 8767, prefetch at 90% TTL | +| Conditional forwarding | — | Yes | Yes | Yes (per-suffix rules) | | Portable (laptop) | No (appliance) | No (appliance) | Server | Single binary, macOS/Linux/Windows | | Community maturity | 56K stars, 10 years | 33K stars | 20 years | New | ## Performance -691ns cached round-trip. ~2.0M qps throughput. Zero heap allocations in the hot path. Recursive queries average 237ms after SRTT warmup (12x improvement over round-robin). ECDSA P-256 DNSSEC verification: 174ns. [Benchmarks →](bench/) +0.1ms cached queries — matches Unbound, 47× faster than NextDNS. Wire-level cache stores raw bytes with in-place TTL patching. Request hedging eliminates p99 spikes: cold recursive p99 538ms vs Unbound 748ms (−28%), σ 4× tighter. [Benchmarks →](benches/) ## Learn More @@ -135,11 +139,14 @@ From Machine B: `curl http://api.numa` → proxied to Machine A's port 8000. Ena - [x] DNS forwarding, caching, ad blocking, developer overrides - [x] `.numa` local domains — auto TLS, path routing, WebSocket proxy - [x] LAN service discovery — mDNS, cross-machine DNS + proxy -- [x] DNS-over-HTTPS — encrypted upstream -- [x] DNS-over-TLS listener — encrypted client connections (RFC 7858, ALPN strict) +- [x] DNS-over-HTTPS — encrypted upstream + server endpoint (RFC 8484) +- [x] DNS-over-TLS — encrypted client listener (RFC 7858) + upstream forwarding (`tls://`) - [x] Recursive resolution + DNSSEC — chain-of-trust, NSEC/NSEC3 - [x] SRTT-based nameserver selection - [x] Multi-forwarder failover — multiple upstreams with SRTT ranking, fallback pool +- [x] Request hedging — parallel requests rescue packet loss and tail latency (all protocols) +- [x] Serve-stale + prefetch — RFC 8767, background refresh at <10% TTL and on stale serve +- [x] Conditional forwarding — per-suffix rules for split-horizon DNS (Tailscale, VPNs) - [x] Cache warming — proactive resolution for configured domains - [x] Mobile onboarding — `setup-phone` QR flow, mobile API, mobileconfig profiles - [ ] pkarr integration — self-sovereign DNS via Mainline DHT -- 2.34.1 From 501902d569a9cb6a837bd2c9df66c24c94df57fa Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 00:56:58 +0300 Subject: [PATCH 040/139] bench: add --vs-adguard mode for Numa vs AdGuard Home comparison MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AdGuard Home on port 5457, both forwarding via DoH. Cached queries tied at 0.1ms. On degraded networks hedging hurts p99 (28ms vs 10ms without) — both requests pay the same high RTT with no random spikes to rescue. On clean networks hedging wins. --- benches/recursive_compare.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/benches/recursive_compare.rs b/benches/recursive_compare.rs index f1a59d2..74f9576 100644 --- a/benches/recursive_compare.rs +++ b/benches/recursive_compare.rs @@ -8,6 +8,7 @@ //! --hedge-5x Hedging: single vs hedge-same vs hedge-dual vs Hickory (5 iterations) //! --vs-unbound Server-to-server: Numa vs Unbound (plain UDP, caching) //! --vs-unbound-cold Cold: Numa vs Unbound (unique subdomains, no cache hits) +//! --vs-adguard Server-to-server: Numa vs AdGuard Home (plain UDP, caching) //! --vs-nextdns Server-to-cloud: Numa (local cache) vs NextDNS (remote, 45.90.28.0) //! --vs-dot DoT server: Numa vs Unbound //! --vs-doh-servers DoH server: Numa vs Unbound (DoT upstream) @@ -158,6 +159,10 @@ fn main() { check_numa_mode(&rt, "forward"); return run_server_comparison(&rt, "dnscrypt-proxy", "127.0.0.1:5455", 5, false); } + if arg("--vs-adguard") { + check_numa_mode(&rt, "forward"); + return run_server_comparison(&rt, "AdGuard Home", "127.0.0.1:5457", 5, false); + } if arg("--vs-nextdns") { check_numa_mode(&rt, "forward"); return run_server_comparison(&rt, "NextDNS", "45.90.28.0:53", 5, false); -- 2.34.1 From 2b29a44ee0f6e6b89875b8ad24043fd46c0c60c9 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 01:02:10 +0300 Subject: [PATCH 041/139] docs: remove unfair NextDNS comparison from performance section Comparing local cache (0.8ms) vs a remote service (37ms) measures network latency, not resolver quality. Any local resolver would show the same advantage. Replaced with AdGuard Home comparison which is a fair local-to-local benchmark. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5c632cf..9979d46 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ From Machine B: `curl http://api.numa` → proxied to Machine A's port 8000. Ena ## Performance -0.1ms cached queries — matches Unbound, 47× faster than NextDNS. Wire-level cache stores raw bytes with in-place TTL patching. Request hedging eliminates p99 spikes: cold recursive p99 538ms vs Unbound 748ms (−28%), σ 4× tighter. [Benchmarks →](benches/) +0.1ms cached queries — matches Unbound and AdGuard Home. Wire-level cache stores raw bytes with in-place TTL patching. Request hedging eliminates p99 spikes: cold recursive p99 538ms vs Unbound 748ms (−28%), σ 4× tighter. [Benchmarks →](benches/) ## Learn More -- 2.34.1 From 4d4e48bbd6c78c6a1f306391c1196492109c85ad Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 01:05:20 +0300 Subject: [PATCH 042/139] chore: bump version to 0.13.0 --- Cargo.lock | 3 ++- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0f7692..dbbd921 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1330,7 +1330,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.12.0" +version = "0.13.0" dependencies = [ "arc-swap", "axum", @@ -1359,6 +1359,7 @@ dependencies = [ "toml", "tower", "webpki-roots 1.0.6", + "x509-parser", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 6ab0972..19044ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.12.0" +version = "0.13.0" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" -- 2.34.1 From ca0084639337ad82dbc7997496763944c438e867 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 07:36:53 +0300 Subject: [PATCH 043/139] fix: forwarding rules override special-use NXDOMAIN for private PTR zones Explicit [[forwarding]] rules now take precedence over the RFC 6303 special-use domain intercept. Previously, PTR queries for private ranges (e.g. 168.192.in-addr.arpa) always returned local NXDOMAIN even when a forwarding rule pointed them at a corporate DNS server. Add full-pipeline resolve_query test harness (test_ctx + resolve_in_test) and two tests covering both the default behavior and the override. Closes #94 --- src/ctx.rs | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 3 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 65b76d3..ee88b78 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -96,7 +96,8 @@ pub async fn resolve_query( None => return Err("empty question section".into()), }; - // Pipeline: overrides -> .tld interception -> blocklist -> local zones -> cache -> upstream + // Pipeline: overrides -> .localhost -> local zones -> special-use (unless forwarded) + // -> .tld proxy -> blocklist -> cache -> forwarding -> recursive/upstream // Each lock is scoped to avoid holding MutexGuard across await points. let (response, path, dnssec) = { let override_record = ctx.overrides.read().unwrap().lookup(&qname); @@ -119,8 +120,11 @@ pub async fn resolve_query( let mut resp = DnsPacket::response_from(&query, ResultCode::NOERROR); resp.answers = records.clone(); (resp, QueryPath::Local, DnssecStatus::Indeterminate) - } else if is_special_use_domain(&qname) { - // RFC 6761/8880: private PTR, DDR, NAT64 — answer locally + } else if is_special_use_domain(&qname) + && crate::system_dns::match_forwarding_rule(&qname, &ctx.forwarding_rules).is_none() + { + // RFC 6761/8880: private PTR, DDR, NAT64 — answer locally, + // unless an explicit forwarding rule covers this zone. let resp = special_use_response(&query, &qname, qtype); (resp, QueryPath::Local, DnssecStatus::Indeterminate) } else if !ctx.proxy_tld_suffix.is_empty() @@ -655,6 +659,7 @@ mod tests { use super::*; use std::collections::HashMap; use std::net::Ipv4Addr; + use std::path::PathBuf; use std::sync::{Arc, Mutex}; use tokio::sync::broadcast; @@ -1036,4 +1041,156 @@ mod tests { "error message must be preserved for logging" ); } + + // ---- Full-pipeline resolve_query tests ---- + + async fn test_ctx() -> Arc { + let socket = UdpSocket::bind("127.0.0.1:0").await.unwrap(); + Arc::new(ServerCtx { + socket, + zone_map: HashMap::new(), + cache: RwLock::new(DnsCache::new(100, 60, 86400)), + refreshing: Mutex::new(HashSet::new()), + stats: Mutex::new(ServerStats::new()), + overrides: RwLock::new(OverrideStore::new()), + blocklist: RwLock::new(BlocklistStore::new()), + query_log: Mutex::new(QueryLog::new(100)), + services: Mutex::new(ServiceStore::new()), + lan_peers: Mutex::new(PeerStore::new(90)), + forwarding_rules: Vec::new(), + upstream_pool: Mutex::new(UpstreamPool::new( + vec![Upstream::Udp("127.0.0.1:53".parse().unwrap())], + vec![], + )), + upstream_auto: false, + upstream_port: 53, + lan_ip: Mutex::new(Ipv4Addr::LOCALHOST), + timeout: Duration::from_secs(3), + hedge_delay: Duration::ZERO, + proxy_tld: "numa".to_string(), + proxy_tld_suffix: ".numa".to_string(), + lan_enabled: false, + config_path: "/tmp/test-numa.toml".to_string(), + config_found: false, + config_dir: PathBuf::from("/tmp"), + data_dir: PathBuf::from("/tmp"), + tls_config: None, + upstream_mode: UpstreamMode::Forward, + root_hints: Vec::new(), + srtt: RwLock::new(SrttCache::new(true)), + inflight: Mutex::new(HashMap::new()), + dnssec_enabled: false, + dnssec_strict: false, + health_meta: HealthMeta::test_fixture(), + ca_pem: None, + mobile_enabled: false, + mobile_port: 8765, + }) + } + + /// Helper: send a query through the full resolve_query pipeline and return + /// the parsed response + query path. + async fn resolve_in_test( + ctx: &Arc, + domain: &str, + qtype: QueryType, + ) -> (DnsPacket, QueryPath) { + let query = DnsPacket::query(0xBEEF, domain, qtype); + let mut buf = BytePacketBuffer::new(); + query.write(&mut buf).unwrap(); + let raw = &buf.buf[..buf.pos]; + let src: SocketAddr = "127.0.0.1:1234".parse().unwrap(); + + let resp_buf = resolve_query(query, raw, src, ctx, Transport::Udp) + .await + .unwrap(); + + let log = ctx.query_log.lock().unwrap(); + let entry = log.query(&crate::query_log::QueryLogFilter { + domain: None, + query_type: None, + path: None, + since: None, + limit: Some(1), + }); + let path = entry.first().unwrap().path; + drop(log); + + let mut resp_parse_buf = BytePacketBuffer::from_bytes(resp_buf.filled()); + let resp = DnsPacket::from_buffer(&mut resp_parse_buf).unwrap(); + (resp, path) + } + + #[tokio::test] + async fn special_use_private_ptr_returns_nxdomain() { + let ctx = test_ctx().await; + let (resp, path) = + resolve_in_test(&ctx, "153.188.168.192.in-addr.arpa", QueryType::PTR).await; + assert_eq!(path, QueryPath::Local); + assert_eq!(resp.header.rescode, ResultCode::NXDOMAIN); + } + + async fn test_ctx_with_forwarding(rules: Vec) -> Arc { + let socket = UdpSocket::bind("127.0.0.1:0").await.unwrap(); + Arc::new(ServerCtx { + socket, + zone_map: HashMap::new(), + cache: RwLock::new(DnsCache::new(100, 60, 86400)), + refreshing: Mutex::new(HashSet::new()), + stats: Mutex::new(ServerStats::new()), + overrides: RwLock::new(OverrideStore::new()), + blocklist: RwLock::new(BlocklistStore::new()), + query_log: Mutex::new(QueryLog::new(100)), + services: Mutex::new(ServiceStore::new()), + lan_peers: Mutex::new(PeerStore::new(90)), + forwarding_rules: rules, + upstream_pool: Mutex::new(UpstreamPool::new( + vec![Upstream::Udp("127.0.0.1:53".parse().unwrap())], + vec![], + )), + upstream_auto: false, + upstream_port: 53, + lan_ip: Mutex::new(Ipv4Addr::LOCALHOST), + timeout: Duration::from_millis(100), + hedge_delay: Duration::ZERO, + proxy_tld: "numa".to_string(), + proxy_tld_suffix: ".numa".to_string(), + lan_enabled: false, + config_path: "/tmp/test-numa.toml".to_string(), + config_found: false, + config_dir: PathBuf::from("/tmp"), + data_dir: PathBuf::from("/tmp"), + tls_config: None, + upstream_mode: UpstreamMode::Forward, + root_hints: Vec::new(), + srtt: RwLock::new(SrttCache::new(true)), + inflight: Mutex::new(HashMap::new()), + dnssec_enabled: false, + dnssec_strict: false, + health_meta: HealthMeta::test_fixture(), + ca_pem: None, + mobile_enabled: false, + mobile_port: 8765, + }) + } + + #[tokio::test] + async fn forwarding_rule_overrides_special_use_domain() { + let rules = vec![ForwardingRule::new( + "168.192.in-addr.arpa".to_string(), + "192.168.88.1:53".parse().unwrap(), + )]; + let ctx = test_ctx_with_forwarding(rules).await; + + let (_, path) = resolve_in_test(&ctx, "153.188.168.192.in-addr.arpa", QueryType::PTR).await; + + // Should attempt forwarding, not return local NXDOMAIN. + // The forwarding will fail (no real upstream at 192.168.88.1), so we + // expect UpstreamError — but critically NOT QueryPath::Local. + assert_ne!( + path, + QueryPath::Local, + "forwarding rule must take precedence over special-use NXDOMAIN" + ); + } } -- 2.34.1 From 48f67be2f15903314e5a99da36bb2a62b457b2e7 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 07:39:55 +0300 Subject: [PATCH 044/139] refactor: deduplicate test_ctx by delegating to test_ctx_with_forwarding --- src/ctx.rs | 42 +----------------------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index ee88b78..e440c2d 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -1045,47 +1045,7 @@ mod tests { // ---- Full-pipeline resolve_query tests ---- async fn test_ctx() -> Arc { - let socket = UdpSocket::bind("127.0.0.1:0").await.unwrap(); - Arc::new(ServerCtx { - socket, - zone_map: HashMap::new(), - cache: RwLock::new(DnsCache::new(100, 60, 86400)), - refreshing: Mutex::new(HashSet::new()), - stats: Mutex::new(ServerStats::new()), - overrides: RwLock::new(OverrideStore::new()), - blocklist: RwLock::new(BlocklistStore::new()), - query_log: Mutex::new(QueryLog::new(100)), - services: Mutex::new(ServiceStore::new()), - lan_peers: Mutex::new(PeerStore::new(90)), - forwarding_rules: Vec::new(), - upstream_pool: Mutex::new(UpstreamPool::new( - vec![Upstream::Udp("127.0.0.1:53".parse().unwrap())], - vec![], - )), - upstream_auto: false, - upstream_port: 53, - lan_ip: Mutex::new(Ipv4Addr::LOCALHOST), - timeout: Duration::from_secs(3), - hedge_delay: Duration::ZERO, - proxy_tld: "numa".to_string(), - proxy_tld_suffix: ".numa".to_string(), - lan_enabled: false, - config_path: "/tmp/test-numa.toml".to_string(), - config_found: false, - config_dir: PathBuf::from("/tmp"), - data_dir: PathBuf::from("/tmp"), - tls_config: None, - upstream_mode: UpstreamMode::Forward, - root_hints: Vec::new(), - srtt: RwLock::new(SrttCache::new(true)), - inflight: Mutex::new(HashMap::new()), - dnssec_enabled: false, - dnssec_strict: false, - health_meta: HealthMeta::test_fixture(), - ca_pem: None, - mobile_enabled: false, - mobile_port: 8765, - }) + test_ctx_with_forwarding(Vec::new()).await } /// Helper: send a query through the full resolve_query pipeline and return -- 2.34.1 From b8ddc16027453beec62888e9ee062b105f362543 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 07:51:14 +0300 Subject: [PATCH 045/139] refactor: return QueryPath from resolve_query, add mock upstream to tests resolve_query now returns (BytePacketBuffer, QueryPath) so callers and tests can inspect the resolution path without reading the query log. Production call sites (UDP, DoT, DoH) destructure and ignore it. The forwarding test now uses a mock UDP upstream that replies with a canned response, asserting QueryPath::Forwarded instead of != Local. --- src/ctx.rs | 57 ++++++++++++++++++++++++++++++++---------------------- src/doh.rs | 2 +- src/dot.rs | 2 +- 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index e440c2d..3f1370a 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -88,7 +88,7 @@ pub async fn resolve_query( src_addr: SocketAddr, ctx: &Arc, transport: Transport, -) -> crate::Result { +) -> crate::Result<(BytePacketBuffer, QueryPath)> { let start = Instant::now(); let (qname, qtype) = match query.questions.first() { @@ -377,7 +377,7 @@ pub async fn resolve_query( dnssec, }); - Ok(resp_buffer) + Ok((resp_buffer, path)) } fn cache_and_parse( @@ -461,7 +461,7 @@ pub async fn handle_query( } }; match resolve_query(query, &buffer.buf[..raw_len], src_addr, ctx, transport).await { - Ok(resp_buffer) => { + Ok((resp_buffer, _)) => { ctx.socket.send_to(resp_buffer.filled(), src_addr).await?; } Err(e) => { @@ -1048,7 +1048,7 @@ mod tests { test_ctx_with_forwarding(Vec::new()).await } - /// Helper: send a query through the full resolve_query pipeline and return + /// Send a query through the full resolve_query pipeline and return /// the parsed response + query path. async fn resolve_in_test( ctx: &Arc, @@ -1061,21 +1061,10 @@ mod tests { let raw = &buf.buf[..buf.pos]; let src: SocketAddr = "127.0.0.1:1234".parse().unwrap(); - let resp_buf = resolve_query(query, raw, src, ctx, Transport::Udp) + let (resp_buf, path) = resolve_query(query, raw, src, ctx, Transport::Udp) .await .unwrap(); - let log = ctx.query_log.lock().unwrap(); - let entry = log.query(&crate::query_log::QueryLogFilter { - domain: None, - query_type: None, - path: None, - since: None, - limit: Some(1), - }); - let path = entry.first().unwrap().path; - drop(log); - let mut resp_parse_buf = BytePacketBuffer::from_bytes(resp_buf.filled()); let resp = DnsPacket::from_buffer(&mut resp_parse_buf).unwrap(); (resp, path) @@ -1134,23 +1123,45 @@ mod tests { }) } + /// Spawn a UDP socket that replies to the first DNS query with the given + /// response packet (patching the query ID). Returns the socket address. + async fn mock_upstream(response: DnsPacket) -> SocketAddr { + let sock = UdpSocket::bind("127.0.0.1:0").await.unwrap(); + let addr = sock.local_addr().unwrap(); + tokio::spawn(async move { + let mut buf = [0u8; 512]; + let (_, src) = sock.recv_from(&mut buf).await.unwrap(); + let query_id = u16::from_be_bytes([buf[0], buf[1]]); + let mut resp = response; + resp.header.id = query_id; + let mut out = BytePacketBuffer::new(); + resp.write(&mut out).unwrap(); + sock.send_to(out.filled(), src).await.unwrap(); + }); + addr + } + #[tokio::test] async fn forwarding_rule_overrides_special_use_domain() { + let mut resp = DnsPacket::new(); + resp.header.response = true; + resp.header.rescode = ResultCode::NOERROR; + let upstream_addr = mock_upstream(resp).await; + let rules = vec![ForwardingRule::new( "168.192.in-addr.arpa".to_string(), - "192.168.88.1:53".parse().unwrap(), + upstream_addr, )]; let ctx = test_ctx_with_forwarding(rules).await; - let (_, path) = resolve_in_test(&ctx, "153.188.168.192.in-addr.arpa", QueryType::PTR).await; + let (resp, path) = + resolve_in_test(&ctx, "153.188.168.192.in-addr.arpa", QueryType::PTR).await; - // Should attempt forwarding, not return local NXDOMAIN. - // The forwarding will fail (no real upstream at 192.168.88.1), so we - // expect UpstreamError — but critically NOT QueryPath::Local. - assert_ne!( + assert_eq!( path, - QueryPath::Local, + QueryPath::Forwarded, "forwarding rule must take precedence over special-use NXDOMAIN" ); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); } } diff --git a/src/doh.rs b/src/doh.rs index f90b919..900edb4 100644 --- a/src/doh.rs +++ b/src/doh.rs @@ -113,7 +113,7 @@ async fn resolve_doh( let questions = query.questions.clone(); match resolve_query(query, dns_bytes, src, ctx, Transport::Doh).await { - Ok(resp_buffer) => { + Ok((resp_buffer, _)) => { let min_ttl = extract_min_ttl(resp_buffer.filled()); dns_response(resp_buffer.filled(), min_ttl) } diff --git a/src/dot.rs b/src/dot.rs index e883e0b..db8257d 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -211,7 +211,7 @@ async fn handle_dot_connection( ) .await { - Ok(resp_buffer) => { + Ok((resp_buffer, _)) => { if write_framed(&mut stream, resp_buffer.filled()) .await .is_err() -- 2.34.1 From b40004fe5e41dc7800d25cbec5e49347a6e68674 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 07:56:47 +0300 Subject: [PATCH 046/139] refactor: extract shared test infrastructure into testutil module - test_ctx(): single ServerCtx builder, replaces 3 copies (ctx/api/dot) - mock_upstream(): canned DNS response server for forwarding tests - blackhole_upstream(): unresponsive socket for timeout tests - Removes ~100 lines of duplicated 30-field struct literals --- src/api.rs | 45 +---------------------- src/ctx.rs | 76 +++------------------------------------ src/dot.rs | 82 +++++++++++++----------------------------- src/lib.rs | 3 ++ src/testutil.rs | 95 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 128 insertions(+), 173 deletions(-) create mode 100644 src/testutil.rs diff --git a/src/api.rs b/src/api.rs index fcc0bd9..6ec3e48 100644 --- a/src/api.rs +++ b/src/api.rs @@ -1020,53 +1020,10 @@ mod tests { use super::*; use axum::body::Body; use http::Request; - use std::sync::{Mutex, RwLock}; use tower::ServiceExt; async fn test_ctx() -> Arc { - let socket = tokio::net::UdpSocket::bind("127.0.0.1:0").await.unwrap(); - Arc::new(ServerCtx { - socket, - zone_map: std::collections::HashMap::new(), - cache: RwLock::new(crate::cache::DnsCache::new(100, 60, 86400)), - refreshing: Mutex::new(std::collections::HashSet::new()), - stats: Mutex::new(crate::stats::ServerStats::new()), - overrides: RwLock::new(crate::override_store::OverrideStore::new()), - blocklist: RwLock::new(crate::blocklist::BlocklistStore::new()), - query_log: Mutex::new(crate::query_log::QueryLog::new(100)), - services: Mutex::new(crate::service_store::ServiceStore::new()), - lan_peers: Mutex::new(crate::lan::PeerStore::new(90)), - forwarding_rules: Vec::new(), - upstream_pool: Mutex::new(crate::forward::UpstreamPool::new( - vec![crate::forward::Upstream::Udp( - "127.0.0.1:53".parse().unwrap(), - )], - vec![], - )), - upstream_auto: false, - upstream_port: 53, - lan_ip: Mutex::new(std::net::Ipv4Addr::LOCALHOST), - timeout: std::time::Duration::from_secs(3), - hedge_delay: std::time::Duration::ZERO, - proxy_tld: "numa".to_string(), - proxy_tld_suffix: ".numa".to_string(), - lan_enabled: false, - config_path: "/tmp/test-numa.toml".to_string(), - config_found: false, - config_dir: std::path::PathBuf::from("/tmp"), - data_dir: std::path::PathBuf::from("/tmp"), - tls_config: None, - upstream_mode: crate::config::UpstreamMode::Forward, - root_hints: Vec::new(), - srtt: RwLock::new(crate::srtt::SrttCache::new(true)), - inflight: Mutex::new(std::collections::HashMap::new()), - dnssec_enabled: false, - dnssec_strict: false, - health_meta: crate::health::HealthMeta::test_fixture(), - ca_pem: None, - mobile_enabled: false, - mobile_port: 8765, - }) + Arc::new(crate::testutil::test_ctx().await) } #[tokio::test] diff --git a/src/ctx.rs b/src/ctx.rs index 3f1370a..475dfe7 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -659,7 +659,6 @@ mod tests { use super::*; use std::collections::HashMap; use std::net::Ipv4Addr; - use std::path::PathBuf; use std::sync::{Arc, Mutex}; use tokio::sync::broadcast; @@ -1044,10 +1043,6 @@ mod tests { // ---- Full-pipeline resolve_query tests ---- - async fn test_ctx() -> Arc { - test_ctx_with_forwarding(Vec::new()).await - } - /// Send a query through the full resolve_query pipeline and return /// the parsed response + query path. async fn resolve_in_test( @@ -1072,87 +1067,26 @@ mod tests { #[tokio::test] async fn special_use_private_ptr_returns_nxdomain() { - let ctx = test_ctx().await; + let ctx = Arc::new(crate::testutil::test_ctx().await); let (resp, path) = resolve_in_test(&ctx, "153.188.168.192.in-addr.arpa", QueryType::PTR).await; assert_eq!(path, QueryPath::Local); assert_eq!(resp.header.rescode, ResultCode::NXDOMAIN); } - async fn test_ctx_with_forwarding(rules: Vec) -> Arc { - let socket = UdpSocket::bind("127.0.0.1:0").await.unwrap(); - Arc::new(ServerCtx { - socket, - zone_map: HashMap::new(), - cache: RwLock::new(DnsCache::new(100, 60, 86400)), - refreshing: Mutex::new(HashSet::new()), - stats: Mutex::new(ServerStats::new()), - overrides: RwLock::new(OverrideStore::new()), - blocklist: RwLock::new(BlocklistStore::new()), - query_log: Mutex::new(QueryLog::new(100)), - services: Mutex::new(ServiceStore::new()), - lan_peers: Mutex::new(PeerStore::new(90)), - forwarding_rules: rules, - upstream_pool: Mutex::new(UpstreamPool::new( - vec![Upstream::Udp("127.0.0.1:53".parse().unwrap())], - vec![], - )), - upstream_auto: false, - upstream_port: 53, - lan_ip: Mutex::new(Ipv4Addr::LOCALHOST), - timeout: Duration::from_millis(100), - hedge_delay: Duration::ZERO, - proxy_tld: "numa".to_string(), - proxy_tld_suffix: ".numa".to_string(), - lan_enabled: false, - config_path: "/tmp/test-numa.toml".to_string(), - config_found: false, - config_dir: PathBuf::from("/tmp"), - data_dir: PathBuf::from("/tmp"), - tls_config: None, - upstream_mode: UpstreamMode::Forward, - root_hints: Vec::new(), - srtt: RwLock::new(SrttCache::new(true)), - inflight: Mutex::new(HashMap::new()), - dnssec_enabled: false, - dnssec_strict: false, - health_meta: HealthMeta::test_fixture(), - ca_pem: None, - mobile_enabled: false, - mobile_port: 8765, - }) - } - - /// Spawn a UDP socket that replies to the first DNS query with the given - /// response packet (patching the query ID). Returns the socket address. - async fn mock_upstream(response: DnsPacket) -> SocketAddr { - let sock = UdpSocket::bind("127.0.0.1:0").await.unwrap(); - let addr = sock.local_addr().unwrap(); - tokio::spawn(async move { - let mut buf = [0u8; 512]; - let (_, src) = sock.recv_from(&mut buf).await.unwrap(); - let query_id = u16::from_be_bytes([buf[0], buf[1]]); - let mut resp = response; - resp.header.id = query_id; - let mut out = BytePacketBuffer::new(); - resp.write(&mut out).unwrap(); - sock.send_to(out.filled(), src).await.unwrap(); - }); - addr - } - #[tokio::test] async fn forwarding_rule_overrides_special_use_domain() { let mut resp = DnsPacket::new(); resp.header.response = true; resp.header.rescode = ResultCode::NOERROR; - let upstream_addr = mock_upstream(resp).await; + let upstream_addr = crate::testutil::mock_upstream(resp).await; - let rules = vec![ForwardingRule::new( + let mut ctx = crate::testutil::test_ctx().await; + ctx.forwarding_rules = vec![ForwardingRule::new( "168.192.in-addr.arpa".to_string(), upstream_addr, )]; - let ctx = test_ctx_with_forwarding(rules).await; + let ctx = Arc::new(ctx); let (resp, path) = resolve_in_test(&ctx, "153.188.168.192.in-addr.arpa", QueryType::PTR).await; diff --git a/src/dot.rs b/src/dot.rs index db8257d..b39d7fe 100644 --- a/src/dot.rs +++ b/src/dot.rs @@ -279,7 +279,7 @@ where mod tests { use super::*; use std::collections::HashMap; - use std::sync::{Mutex, RwLock}; + use std::sync::Mutex; use rcgen::{CertificateParams, DnType, KeyPair}; use rustls::pki_types::{CertificateDer, PrivateKeyDer, PrivatePkcs8KeyDer, ServerName}; @@ -344,63 +344,29 @@ mod tests { async fn spawn_dot_server() -> (SocketAddr, CertificateDer<'static>) { let (server_tls, cert_der) = test_tls_configs(); - let socket = tokio::net::UdpSocket::bind("127.0.0.1:0").await.unwrap(); - // Bind an unresponsive upstream and leak it so it lives for the test duration. - let blackhole = Box::leak(Box::new(std::net::UdpSocket::bind("127.0.0.1:0").unwrap())); - let upstream_addr = blackhole.local_addr().unwrap(); - let ctx = Arc::new(ServerCtx { - socket, - zone_map: { - let mut m = HashMap::new(); - let mut inner = HashMap::new(); - inner.insert( - QueryType::A, - vec![DnsRecord::A { - domain: "dot-test.example".to_string(), - addr: std::net::Ipv4Addr::new(10, 0, 0, 1), - ttl: 300, - }], - ); - m.insert("dot-test.example".to_string(), inner); - m - }, - cache: RwLock::new(crate::cache::DnsCache::new(100, 60, 86400)), - refreshing: Mutex::new(std::collections::HashSet::new()), - stats: Mutex::new(crate::stats::ServerStats::new()), - overrides: RwLock::new(crate::override_store::OverrideStore::new()), - blocklist: RwLock::new(crate::blocklist::BlocklistStore::new()), - query_log: Mutex::new(crate::query_log::QueryLog::new(100)), - services: Mutex::new(crate::service_store::ServiceStore::new()), - lan_peers: Mutex::new(crate::lan::PeerStore::new(90)), - forwarding_rules: Vec::new(), - upstream_pool: Mutex::new(crate::forward::UpstreamPool::new( - vec![crate::forward::Upstream::Udp(upstream_addr)], - vec![], - )), - upstream_auto: false, - upstream_port: 53, - lan_ip: Mutex::new(std::net::Ipv4Addr::LOCALHOST), - timeout: Duration::from_millis(200), - hedge_delay: Duration::ZERO, - proxy_tld: "numa".to_string(), - proxy_tld_suffix: ".numa".to_string(), - lan_enabled: false, - config_path: String::new(), - config_found: false, - config_dir: std::path::PathBuf::from("/tmp"), - data_dir: std::path::PathBuf::from("/tmp"), - tls_config: Some(arc_swap::ArcSwap::from(server_tls)), - upstream_mode: crate::config::UpstreamMode::Forward, - root_hints: Vec::new(), - srtt: RwLock::new(crate::srtt::SrttCache::new(true)), - inflight: Mutex::new(HashMap::new()), - dnssec_enabled: false, - dnssec_strict: false, - health_meta: crate::health::HealthMeta::test_fixture(), - ca_pem: None, - mobile_enabled: false, - mobile_port: 8765, - }); + let upstream_addr = crate::testutil::blackhole_upstream(); + + let mut ctx = crate::testutil::test_ctx().await; + ctx.zone_map = { + let mut m = HashMap::new(); + let mut inner = HashMap::new(); + inner.insert( + QueryType::A, + vec![DnsRecord::A { + domain: "dot-test.example".to_string(), + addr: std::net::Ipv4Addr::new(10, 0, 0, 1), + ttl: 300, + }], + ); + m.insert("dot-test.example".to_string(), inner); + m + }; + ctx.upstream_pool = Mutex::new(crate::forward::UpstreamPool::new( + vec![crate::forward::Upstream::Udp(upstream_addr)], + vec![], + )); + ctx.tls_config = Some(arc_swap::ArcSwap::from(server_tls)); + let ctx = Arc::new(ctx); let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 92a0b00..8933e2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,9 @@ pub mod system_dns; pub mod tls; pub mod wire; +#[cfg(test)] +pub(crate) mod testutil; + pub type Error = Box; pub type Result = std::result::Result; diff --git a/src/testutil.rs b/src/testutil.rs new file mode 100644 index 0000000..8687625 --- /dev/null +++ b/src/testutil.rs @@ -0,0 +1,95 @@ +use std::collections::{HashMap, HashSet}; +use std::net::{Ipv4Addr, SocketAddr}; +use std::path::PathBuf; +use std::sync::{Mutex, RwLock}; +use std::time::Duration; + +use tokio::net::UdpSocket; + +use crate::blocklist::BlocklistStore; +use crate::buffer::BytePacketBuffer; +use crate::cache::DnsCache; +use crate::config::UpstreamMode; +use crate::ctx::ServerCtx; +use crate::forward::{Upstream, UpstreamPool}; +use crate::health::HealthMeta; +use crate::lan::PeerStore; +use crate::override_store::OverrideStore; +use crate::packet::DnsPacket; +use crate::query_log::QueryLog; +use crate::service_store::ServiceStore; +use crate::srtt::SrttCache; +use crate::stats::ServerStats; +/// Minimal `ServerCtx` for tests. Override fields after construction +/// (all fields are `pub`), then wrap in `Arc`. +pub async fn test_ctx() -> ServerCtx { + let socket = UdpSocket::bind("127.0.0.1:0").await.unwrap(); + ServerCtx { + socket, + zone_map: HashMap::new(), + cache: RwLock::new(DnsCache::new(100, 60, 86400)), + refreshing: Mutex::new(HashSet::new()), + stats: Mutex::new(ServerStats::new()), + overrides: RwLock::new(OverrideStore::new()), + blocklist: RwLock::new(BlocklistStore::new()), + query_log: Mutex::new(QueryLog::new(100)), + services: Mutex::new(ServiceStore::new()), + lan_peers: Mutex::new(PeerStore::new(90)), + forwarding_rules: Vec::new(), + upstream_pool: Mutex::new(UpstreamPool::new( + vec![Upstream::Udp("127.0.0.1:53".parse().unwrap())], + vec![], + )), + upstream_auto: false, + upstream_port: 53, + lan_ip: Mutex::new(Ipv4Addr::LOCALHOST), + timeout: Duration::from_millis(200), + hedge_delay: Duration::ZERO, + proxy_tld: "numa".to_string(), + proxy_tld_suffix: ".numa".to_string(), + lan_enabled: false, + config_path: "/tmp/test-numa.toml".to_string(), + config_found: false, + config_dir: PathBuf::from("/tmp"), + data_dir: PathBuf::from("/tmp"), + tls_config: None, + upstream_mode: UpstreamMode::Forward, + root_hints: Vec::new(), + srtt: RwLock::new(SrttCache::new(true)), + inflight: Mutex::new(HashMap::new()), + dnssec_enabled: false, + dnssec_strict: false, + health_meta: HealthMeta::test_fixture(), + ca_pem: None, + mobile_enabled: false, + mobile_port: 8765, + } +} + +/// Spawn a UDP socket that replies to the first DNS query with the given +/// response packet (patching the query ID to match). Returns the socket address. +pub async fn mock_upstream(response: DnsPacket) -> SocketAddr { + let sock = UdpSocket::bind("127.0.0.1:0").await.unwrap(); + let addr = sock.local_addr().unwrap(); + tokio::spawn(async move { + let mut buf = [0u8; 512]; + let (_, src) = sock.recv_from(&mut buf).await.unwrap(); + let query_id = u16::from_be_bytes([buf[0], buf[1]]); + let mut resp = response; + resp.header.id = query_id; + let mut out = BytePacketBuffer::new(); + resp.write(&mut out).unwrap(); + sock.send_to(out.filled(), src).await.unwrap(); + }); + addr +} + +/// UDP socket that accepts connections but never replies. +/// Useful as an upstream that triggers timeouts. +pub fn blackhole_upstream() -> SocketAddr { + let sock = std::net::UdpSocket::bind("127.0.0.1:0").unwrap(); + let addr = sock.local_addr().unwrap(); + // Leak so it stays bound for the duration of the test process. + Box::leak(Box::new(sock)); + addr +} -- 2.34.1 From 155c1c4da0f1fcd7f27c835939967a87ecebcae5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 08:04:59 +0300 Subject: [PATCH 047/139] test: full-pipeline coverage for every resolve_query step Test each pipeline stage in isolation through resolve_query: - override takes precedence over all other paths - localhost and *.localhost resolve to loopback - local zone returns configured records - .tld proxy resolves registered services to loopback - blocklist sinkholes to 0.0.0.0 - cache hit returns stored response without upstream --- src/ctx.rs | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/src/ctx.rs b/src/ctx.rs index 475dfe7..460b0eb 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -1098,4 +1098,125 @@ mod tests { ); assert_eq!(resp.header.rescode, ResultCode::NOERROR); } + + #[tokio::test] + async fn pipeline_override_takes_precedence() { + let ctx = crate::testutil::test_ctx().await; + ctx.overrides + .write() + .unwrap() + .insert("override.test", "1.2.3.4", 60, None) + .unwrap(); + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "override.test", QueryType::A).await; + assert_eq!(path, QueryPath::Overridden); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + assert_eq!(resp.answers.len(), 1); + } + + #[tokio::test] + async fn pipeline_localhost_resolves_to_loopback() { + let ctx = Arc::new(crate::testutil::test_ctx().await); + + let (resp, path) = resolve_in_test(&ctx, "localhost", QueryType::A).await; + assert_eq!(path, QueryPath::Local); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + match &resp.answers[0] { + DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::LOCALHOST), + other => panic!("expected A record, got {:?}", other), + } + } + + #[tokio::test] + async fn pipeline_localhost_subdomain_resolves_to_loopback() { + let ctx = Arc::new(crate::testutil::test_ctx().await); + + let (resp, path) = resolve_in_test(&ctx, "app.localhost", QueryType::A).await; + assert_eq!(path, QueryPath::Local); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + } + + #[tokio::test] + async fn pipeline_local_zone_returns_configured_record() { + let mut ctx = crate::testutil::test_ctx().await; + let mut inner = HashMap::new(); + inner.insert( + QueryType::A, + vec![DnsRecord::A { + domain: "myapp.test".to_string(), + addr: Ipv4Addr::new(10, 0, 0, 42), + ttl: 300, + }], + ); + ctx.zone_map.insert("myapp.test".to_string(), inner); + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "myapp.test", QueryType::A).await; + assert_eq!(path, QueryPath::Local); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + match &resp.answers[0] { + DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::new(10, 0, 0, 42)), + other => panic!("expected A record, got {:?}", other), + } + } + + #[tokio::test] + async fn pipeline_tld_proxy_resolves_service() { + let ctx = crate::testutil::test_ctx().await; + ctx.services.lock().unwrap().insert("grafana", 3000); + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "grafana.numa", QueryType::A).await; + assert_eq!(path, QueryPath::Local); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + match &resp.answers[0] { + DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::LOCALHOST), + other => panic!("expected A record, got {:?}", other), + } + } + + #[tokio::test] + async fn pipeline_blocklist_sinkhole() { + let ctx = crate::testutil::test_ctx().await; + let mut domains = std::collections::HashSet::new(); + domains.insert("ads.tracker.test".to_string()); + ctx.blocklist.write().unwrap().swap_domains(domains, vec![]); + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "ads.tracker.test", QueryType::A).await; + assert_eq!(path, QueryPath::Blocked); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + match &resp.answers[0] { + DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::UNSPECIFIED), + other => panic!("expected sinkhole A record, got {:?}", other), + } + } + + #[tokio::test] + async fn pipeline_cache_hit() { + let ctx = Arc::new(crate::testutil::test_ctx().await); + + // Pre-populate cache with a response + let mut pkt = DnsPacket::new(); + pkt.header.response = true; + pkt.header.rescode = ResultCode::NOERROR; + pkt.questions.push(crate::question::DnsQuestion { + name: "cached.test".to_string(), + qtype: QueryType::A, + }); + pkt.answers.push(DnsRecord::A { + domain: "cached.test".to_string(), + addr: Ipv4Addr::new(5, 5, 5, 5), + ttl: 3600, + }); + ctx.cache + .write() + .unwrap() + .insert("cached.test", QueryType::A, &pkt); + + let (resp, path) = resolve_in_test(&ctx, "cached.test", QueryType::A).await; + assert_eq!(path, QueryPath::Cached); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + } } -- 2.34.1 From 0bdde40f4094fd298b2a2db7bcf74064451982b6 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 08:07:58 +0300 Subject: [PATCH 048/139] test: verify forwarded response content from mock upstream --- src/ctx.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/ctx.rs b/src/ctx.rs index 460b0eb..4e5d938 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -1219,4 +1219,33 @@ mod tests { assert_eq!(path, QueryPath::Cached); assert_eq!(resp.header.rescode, ResultCode::NOERROR); } + + #[tokio::test] + async fn pipeline_forwarding_returns_upstream_answer() { + let mut upstream_resp = DnsPacket::new(); + upstream_resp.header.response = true; + upstream_resp.header.rescode = ResultCode::NOERROR; + upstream_resp.answers.push(DnsRecord::A { + domain: "internal.corp".to_string(), + addr: Ipv4Addr::new(10, 1, 2, 3), + ttl: 600, + }); + let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await; + + let mut ctx = crate::testutil::test_ctx().await; + ctx.forwarding_rules = vec![ForwardingRule::new("corp".to_string(), upstream_addr)]; + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "internal.corp", QueryType::A).await; + assert_eq!(path, QueryPath::Forwarded); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + assert_eq!(resp.answers.len(), 1); + match &resp.answers[0] { + DnsRecord::A { domain, addr, .. } => { + assert_eq!(domain, "internal.corp"); + assert_eq!(*addr, Ipv4Addr::new(10, 1, 2, 3)); + } + other => panic!("expected A record, got {:?}", other), + } + } } -- 2.34.1 From d3f046da4cab44e8c6201003344336b46d81eb06 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 08:10:26 +0300 Subject: [PATCH 049/139] style: assert loopback addr in subdomain test, trim verbose comment --- src/ctx.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 4e5d938..2812bed 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -123,8 +123,7 @@ pub async fn resolve_query( } else if is_special_use_domain(&qname) && crate::system_dns::match_forwarding_rule(&qname, &ctx.forwarding_rules).is_none() { - // RFC 6761/8880: private PTR, DDR, NAT64 — answer locally, - // unless an explicit forwarding rule covers this zone. + // RFC 6761/8880: answer locally unless a forwarding rule covers this zone. let resp = special_use_response(&query, &qname, qtype); (resp, QueryPath::Local, DnssecStatus::Indeterminate) } else if !ctx.proxy_tld_suffix.is_empty() @@ -1135,6 +1134,10 @@ mod tests { let (resp, path) = resolve_in_test(&ctx, "app.localhost", QueryType::A).await; assert_eq!(path, QueryPath::Local); assert_eq!(resp.header.rescode, ResultCode::NOERROR); + match &resp.answers[0] { + DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::LOCALHOST), + other => panic!("expected A record, got {:?}", other), + } } #[tokio::test] -- 2.34.1 From 6b0a30d004c2eeafc8839a3dd3145fc4c1dde0ad Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 13:49:40 +0300 Subject: [PATCH 050/139] blog: add fixing DoH tail latency post + blog infrastructure New post on reqwest HTTP/2 window tuning and request hedging (Dean & Barroso's "The Tail at Scale" applied to DNS forwarding). Covers DoH forwarding p99 improvement and cold recursive resolution from 2.3s to 538ms. Also adds blog build infrastructure: index generation script, draft preview server, hero metrics/before-after CSS, and normalizes date format across existing posts. --- .gitignore | 2 + Makefile | 13 ++ blog/dns-from-scratch.md | 2 +- blog/dnssec-from-scratch.md | 2 +- blog/dot-from-scratch.md | 2 +- blog/fixing-doh-tail-latency.md | 169 ++++++++++++++++++++++ scripts/generate-blog-index.sh | 239 ++++++++++++++++++++++++++++++++ scripts/serve-site.sh | 14 ++ site/blog-template.html | 96 +++++++++++++ site/blog/index.html | 11 +- 10 files changed, 545 insertions(+), 5 deletions(-) create mode 100644 blog/fixing-doh-tail-latency.md create mode 100755 scripts/generate-blog-index.sh create mode 100755 scripts/serve-site.sh diff --git a/.gitignore b/.gitignore index 649d86b..acfc601 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ CLAUDE.md docs/ site/blog/posts/ ios/ +drafts/ +site/blog/index.html diff --git a/Makefile b/Makefile index f84761a..dbff53a 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,19 @@ blog: pandoc "$$f" --template=site/blog-template.html -o "site/blog/posts/$$name.html"; \ echo " $$f → site/blog/posts/$$name.html"; \ done + @scripts/generate-blog-index.sh + +blog-drafts: blog + @if [ -d drafts ] && ls drafts/*.md >/dev/null 2>&1; then \ + for f in drafts/*.md; do \ + name=$$(basename "$$f" .md); \ + pandoc "$$f" --template=site/blog-template.html -o "site/blog/posts/$$name.html"; \ + echo " $$f → site/blog/posts/$$name.html (draft)"; \ + done; \ + BLOG_INCLUDE_DRAFTS=1 scripts/generate-blog-index.sh; \ + else \ + echo " No drafts found"; \ + fi release: ifndef VERSION diff --git a/blog/dns-from-scratch.md b/blog/dns-from-scratch.md index 7bf666c..c626f8a 100644 --- a/blog/dns-from-scratch.md +++ b/blog/dns-from-scratch.md @@ -1,7 +1,7 @@ --- title: I Built a DNS Resolver from Scratch in Rust description: How DNS actually works at the wire level — label compression, TTL tricks, DoH, and what surprised me building a resolver with zero DNS libraries. -date: March 2026 +date: 2026-03-20 --- I wanted to understand how DNS actually works. Not the "it translates domain names to IP addresses" explanation — the actual bytes on the wire. What does a DNS packet look like? How does label compression work? Why is everything crammed into 512 bytes? diff --git a/blog/dnssec-from-scratch.md b/blog/dnssec-from-scratch.md index 01bc5c5..804b425 100644 --- a/blog/dnssec-from-scratch.md +++ b/blog/dnssec-from-scratch.md @@ -1,7 +1,7 @@ --- title: Implementing DNSSEC from Scratch in Rust description: Recursive resolution from root hints, chain-of-trust validation, NSEC/NSEC3 denial proofs, and what I learned implementing DNSSEC with zero DNS libraries. -date: March 2026 +date: 2026-03-28 --- In the [previous post](/blog/posts/dns-from-scratch.html) I covered how DNS works at the wire level — packet format, label compression, TTL caching, DoH. Numa was a forwarding resolver: it parsed packets, did useful things locally, and relayed the rest to Cloudflare or Quad9. diff --git a/blog/dot-from-scratch.md b/blog/dot-from-scratch.md index 448f185..859202d 100644 --- a/blog/dot-from-scratch.md +++ b/blog/dot-from-scratch.md @@ -1,7 +1,7 @@ --- title: DNS-over-TLS from Scratch in Rust description: Building RFC 7858 on top of rustls — length-prefix framing, ALPN cross-protocol defense, and two bugs that only the strict clients caught. -date: April 2026 +date: 2026-04-06 --- The [previous post](/blog/posts/dnssec-from-scratch.html) ended with "DoT — the last encrypted transport we don't support." This post is about building it. diff --git a/blog/fixing-doh-tail-latency.md b/blog/fixing-doh-tail-latency.md new file mode 100644 index 0000000..661c456 --- /dev/null +++ b/blog/fixing-doh-tail-latency.md @@ -0,0 +1,169 @@ +--- +title: Fixing DNS tail latency with a 5-line config and a 50-line function +description: We had periodic 40-140ms DoH spikes from hyper's dispatch channel. The fix was reqwest window tuning and request hedging — Dean & Barroso's "The Tail at Scale," applied to a DNS forwarder. Same ideas took our cold recursive p99 from 2.3 seconds to 538ms. +date: 2026-04-12 +--- + +Numa forwards DNS queries over HTTPS using reqwest. When we benchmarked the DoH path, we found periodic 40-140ms latency spikes every ~100ms of wall clock, in an otherwise ~10ms distribution. The tail was dragging our average — median 10ms, mean 23ms. + +
+
+
DoH forwarding p99
+
113 → 71ms
+
window tuning + request hedging
+
+
+
Cold recursive p99
+
2.3s → 538ms
+
NS caching, serve-stale, parallel queries
+
+
+
Forwarding σ
+
31 → 13ms
+
random spikes become parallel races
+
+
+ +The fix was a 5-line reqwest config and a 50-line hedging function. This post is also an advertisement for Dean & Barroso's 2013 paper ["The Tail at Scale"](https://research.google/pubs/pub40801/) — a decade-old idea that still demolishes dispatch spikes. + +--- + +## The cause: hyper's dispatch channel + +Reqwest sits on top of hyper, which interposes an mpsc dispatch channel and a separate `ClientTask` between `.send()` and the h2 stream. We instrumented the forwarding path and confirmed: 100% of the spike time lives in the `send()` phase, and a parallel heartbeat task showed zero runtime lag during spikes. The tokio runtime was fine — the stall was internal to hyper's request scheduling. + +Hickory-resolver doesn't have this issue. It holds `h2::SendRequest` directly and calls `ready().await; send_request()` in the caller's task — no channel, no scheduling dependency. We used it as a reference point throughout. + +## Fix #1 — HTTP/2 window sizes + +Reqwest inherits hyper's HTTP/2 defaults: 2 MB stream window, 5 MB connection window. For DNS responses (~200 bytes), that's ~10,000× oversized — unnecessary WINDOW_UPDATE frames, bloated bookkeeping on every poll, and different server-side scheduling behavior. + +Setting both windows to the h2 spec default (64 KB) dropped our median from 13.3ms to 10.1ms: + +```rust +reqwest::Client::builder() + .use_rustls_tls() + .http2_initial_stream_window_size(65_535) + .http2_initial_connection_window_size(65_535) + .http2_keep_alive_interval(Duration::from_secs(15)) + .http2_keep_alive_while_idle(true) + .http2_keep_alive_timeout(Duration::from_secs(10)) + .pool_idle_timeout(Duration::from_secs(300)) + .pool_max_idle_per_host(1) + .build() +``` + +**Any Rust code using reqwest for tiny-payload HTTP/2 workloads — DoH, API polling, metric scraping — is probably hitting this.** + +## Fix #2 — Request hedging + +["The Tail at Scale"](https://research.google/pubs/pub40801/) (Dean & Barroso, 2013): fire a request, and if it doesn't return within your P50 latency, fire the same request in parallel. First response wins. + +The intuition: if 5% of requests spike due to independent random events, two parallel requests means only 0.25% of pairs spike on *both*. The tail collapses. + +**The surprise: hedging against the same upstream works.** HTTP/2 multiplexes streams — two `send_request()` calls on one connection become independent h2 streams. If one stalls in the dispatch channel, the other keeps making progress. + +```rust +pub async fn forward_with_hedging_raw( + wire: &[u8], + primary: &Upstream, + secondary: &Upstream, + hedge_delay: Duration, + timeout_duration: Duration, +) -> Result> { + let primary_fut = forward_query_raw(wire, primary, timeout_duration); + tokio::pin!(primary_fut); + let delay = sleep(hedge_delay); + tokio::pin!(delay); + + // Phase 1: wait for primary to return OR the hedge delay. + tokio::select! { + result = &mut primary_fut => return result, + _ = &mut delay => {} + } + + // Phase 2: hedge delay expired — fire secondary, keep primary alive. + let secondary_fut = forward_query_raw(wire, secondary, timeout_duration); + tokio::pin!(secondary_fut); + + // First successful response wins. + tokio::select! { + r = primary_fut => r, + r = secondary_fut => r, + } +} +``` + +The [production version](https://github.com/razvandimescu/numa/blob/main/src/forward.rs#L267) adds error handling — if one leg fails, it waits for the other. In production, Numa passes the same `&Upstream` twice when only one is configured. We extended hedging to all protocols — UDP (rescues packet loss on WiFi), DoT (rescues TLS handshake stalls). Configurable via `hedge_ms`; set to 0 to disable. + +**Caveat: hedging hurts on degraded networks.** When latency is consistently high (no random spikes, just slow), the hedge adds overhead with nothing to rescue. Hedging is a variance reducer, not a latency reducer — it only helps when spikes are *random*. + +--- + +## Forwarding results + +5 iterations × 101 domains × 10 rounds, 5,050 samples per method. Hickory-resolver included as a reference (it uses h2 directly, no dispatch channel): + +| | Single | **Hedged** | Hickory (ref) | +|---|---|---|---| +| mean | 17.4ms | **14.3ms** | 16.8ms | +| median | 10.4ms | **10.2ms** | 13.3ms | +| p95 | 52.5ms | **28.6ms** | 37.7ms | +| p99 | 113.4ms | **71.3ms** | 98.1ms | +| σ | 30.6ms | **13.2ms** | 19.1ms | + +The internal improvement: hedging cut p95 by 45%, p99 by 37%, σ by 57%. The exact margin vs hickory varies with network conditions; the σ reduction is consistent across runs. + +## Recursive resolution: from 2.3 seconds to 538ms + +Forwarding is one job. Recursive resolution — walking from root hints through TLD nameservers to the authoritative server — is a different one. We started 15× behind Unbound on cold recursive p99 and traced it to four root causes. + +**1. Missing NS delegation caching.** We cached glue records (ns1's IP) but not the delegation itself. Every `.com` query walked from root. Fix: cache NS records from referral authority sections. (10 lines) + +**2. Expired cache entries caused full cold resolutions.** Fix: serve-stale ([RFC 8767](https://www.rfc-editor.org/rfc/rfc8767)) — return expired entries with TTL=1 while revalidating in the background. (20 lines) + +**3. 1,900ms wasted per unreachable server.** 800ms UDP timeout + unconditional 1,500ms TCP fallback. Fix: 400ms UDP, TCP only for truncation. (5 lines) + +**4. Sequential NS queries on cold starts.** Fix: fire to the top 2 nameservers simultaneously. First response wins, SRTT recorded for both. Same hedging principle. (50 lines) + +
+
+
p99 before
+
2,367ms
+
+
+
+
p99 after
+
538ms
+
+
+
Unbound (ref)
+
748ms
+
+
+ +Genuine cold benchmarks — unique subdomains, 1 query per domain, 5 iterations, 505 samples per server: + +| | Baseline | Final | Unbound (ref) | +|---|---|---|---| +| p99 | 2,367ms | **538ms** | 748ms | +| σ | 254ms | **114ms** | 457ms | +| median | — | 77.6ms | 74.7ms | + +Unbound wins median by ~4% — its C implementation and 19 years of recursive optimization give it an edge on raw speed. It also has features we don't yet: aggressive NSEC caching ([RFC 8198](https://www.rfc-editor.org/rfc/rfc8198)) and a persistent infra cache. Where hedging shines is the tail — domains with slow or unreachable nameservers, where parallel queries turn worst-case sequential timeouts into races. + +Cache hits are tied across Numa, Unbound, and AdGuard Home — all serve at 0.1ms. + +--- + +## Takeaways + +The real hero of this post is Dean & Barroso. Hedging works because **spikes are random, and two random draws rarely both lose**. It's effective for any HTTP/2 client, any language, any forwarder topology. Nobody we know of ships it by default. + +If you're building a Rust service that makes many small HTTP/2 requests to the same backend: check your flow control window sizes first, then implement hedging. Don't rewrite the client. + +Benchmarks are in [`benches/recursive_compare.rs`](https://github.com/razvandimescu/numa/blob/main/benches/recursive_compare.rs) — run them yourself. If you're using reqwest for tiny-payload workloads and try the window size fix, I'd love to hear if you see the same improvement. + +--- + +Numa is a DNS resolver that runs on your laptop or phone. DoH, DoT, .numa local domains, ad blocking, developer overrides, a REST API, and all the optimization work in this post. [github.com/razvandimescu/numa](https://github.com/razvandimescu/numa). diff --git a/scripts/generate-blog-index.sh b/scripts/generate-blog-index.sh new file mode 100755 index 0000000..cacc033 --- /dev/null +++ b/scripts/generate-blog-index.sh @@ -0,0 +1,239 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Generate site/blog/index.html from blog/*.md frontmatter. +# Reads title, description, date from YAML frontmatter in each post. +# Sorts newest first (by date string — "April 2026" > "March 2026"). + +OUT="site/blog/index.html" + +# Extract frontmatter fields from a markdown file +extract() { + local file="$1" field="$2" + sed -n '/^---$/,/^---$/p' "$file" | grep "^${field}:" | sed "s/^${field}: *//" +} + +# Collect posts: "date|name|title|description" per line +posts="" +sources="blog/*.md" +if [ "${BLOG_INCLUDE_DRAFTS:-}" = "1" ] && ls drafts/*.md >/dev/null 2>&1; then + sources="blog/*.md drafts/*.md" +fi +for f in $sources; do + name=$(basename "$f" .md) + title=$(extract "$f" title) + desc=$(extract "$f" description) + date=$(extract "$f" date) + posts+="${date}|${name}|${title}|${desc}"$'\n' +done + +# Sort by ISO date (YYYY-MM-DD), newest first +posts=$(echo "$posts" | grep -v '^$' | sort -t'|' -k1 -r) + +# Format ISO date (YYYY-MM-DD) to "Month YYYY" +format_date() { + local months=(January February March April May June July August September October November December) + local y="${1%%-*}" + local m="${1#*-}"; m="${m%%-*}"; m=$((10#$m)) + echo "${months[$((m-1))]} $y" +} + +# Generate post list items +items="" +while IFS='|' read -r date name title desc; do + display_date=$(format_date "$date") + items+="
  • + +
    ${title}
    +
    ${desc}
    +
    ${display_date}
    +
    +
  • +" +done <<< "$posts" + +# Write the full index.html — style matches the existing hand-maintained version +cat > "$OUT" << HTMLEOF + + + + + +Blog — Numa + + + + + + + + +
    +

    Blog

    +
      +${items}
    +
    + + + + + +HTMLEOF + +echo " blog/index.html generated ($(echo "$posts" | wc -l | tr -d ' ') posts)" diff --git a/scripts/serve-site.sh b/scripts/serve-site.sh new file mode 100755 index 0000000..23854ff --- /dev/null +++ b/scripts/serve-site.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +PORT="${1:-9000}" + +if [[ "${1:-}" == "--drafts" ]] || [[ "${2:-}" == "--drafts" ]]; then + PORT="${PORT//--drafts/9000}" # default port if --drafts was first arg + make blog-drafts +else + make blog +fi + +echo "Serving site at http://localhost:$PORT" +cd site && python3 -m http.server "$PORT" diff --git a/site/blog-template.html b/site/blog-template.html index 54f0eae..8f8a825 100644 --- a/site/blog-template.html +++ b/site/blog-template.html @@ -267,9 +267,105 @@ body::before { .blog-footer a:hover { color: var(--amber); } /* --- Responsive --- */ +/* Hero metrics cards */ +.hero-metrics { + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: 1rem; + margin: 2rem 0; +} +.metric-card { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 6px; + padding: 1.25rem; + text-align: center; +} +.metric-vs { + font-family: var(--font-mono); + font-size: 0.7rem; + letter-spacing: 0.08em; + text-transform: uppercase; + color: var(--text-dim); + margin-bottom: 0.5rem; +} +.metric-value { + font-family: var(--font-display); + font-size: 2.4rem; + font-weight: 400; + color: var(--amber); + line-height: 1.1; +} +.metric-label { + font-size: 0.82rem; + color: var(--text-secondary); + margin-top: 0.5rem; + line-height: 1.3; +} + +/* Before/after progression */ +.before-after { + display: flex; + align-items: center; + justify-content: center; + gap: 1.5rem; + margin: 2rem 0; + padding: 1.5rem; + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 6px; +} +.ba-item { text-align: center; } +.ba-label { + font-family: var(--font-mono); + font-size: 0.7rem; + letter-spacing: 0.08em; + text-transform: uppercase; + color: var(--text-dim); + margin-bottom: 0.3rem; +} +.ba-value { + font-family: var(--font-display); + font-size: 1.8rem; + font-weight: 400; + color: var(--text-secondary); +} +.ba-before { + text-decoration: line-through; + text-decoration-color: rgba(192, 98, 58, 0.4); + color: var(--text-dim); +} +.ba-after { color: var(--amber); } +.ba-arrow { font-size: 1.5rem; color: var(--text-dim); } +.ba-ref { + border-left: 1px solid var(--border); + padding-left: 1.5rem; +} + +/* Spike highlight */ +.spike { + background: rgba(192, 98, 58, 0.12); + padding: 0.15em 0.5em; + border-radius: 3px; + font-weight: 600; + color: var(--amber-dim); +} + +/* Section dividers */ +.article hr { + border: none; + height: 1px; + background: var(--border); + margin: 3rem auto; + max-width: 120px; +} + @media (max-width: 640px) { .article { padding: 2rem 1.25rem 4rem; } .article pre { padding: 1rem; margin-left: -0.5rem; margin-right: -0.5rem; border-radius: 0; border-left: none; border-right: none; } + .hero-metrics { grid-template-columns: 1fr; } + .before-after { flex-direction: column; gap: 0.75rem; } + .ba-ref { border-left: none; border-top: 1px solid var(--border); padding-left: 0; padding-top: 0.75rem; } } diff --git a/site/blog/index.html b/site/blog/index.html index 993c166..d4df9e4 100644 --- a/site/blog/index.html +++ b/site/blog/index.html @@ -168,10 +168,17 @@ body::before {

    Blog

      +
    • + +
      Fixing DNS tail latency with a 5-line config and a 50-line function
      +
      We had periodic 40-140ms DoH spikes from hyper's dispatch channel. The fix was reqwest window tuning and request hedging — Dean & Barroso's "The Tail at Scale," applied to a DNS forwarder. Same ideas took our cold recursive p99 from 2.3 seconds to 538ms.
      + +
      +
    • DNS-over-TLS from Scratch in Rust
      -
      Building RFC 7858 on top of rustls — length-prefix framing, ALPN cross-protocol defense, iPhone dogfooding, and two bugs that only the strict clients caught.
      +
      Building RFC 7858 on top of rustls — length-prefix framing, ALPN cross-protocol defense, and two bugs that only the strict clients caught.
    • @@ -185,7 +192,7 @@ body::before {
    • I Built a DNS Resolver from Scratch in Rust
      -
      How DNS actually works at the wire level — label compression, TTL tricks, DoH implementation, and what I learned building a resolver with zero DNS libraries.
      +
      How DNS actually works at the wire level — label compression, TTL tricks, DoH, and what surprised me building a resolver with zero DNS libraries.
    • -- 2.34.1 From 908d076d9be98ec3545ef7f575c1b94106112d7c Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 14:37:24 +0300 Subject: [PATCH 051/139] blog: pain-first opening, I-voice, forward-looking close - Open with shared reqwest pain, not the tool name - Switch "we" to "I" for personal voice (playbook: solo dev > corporate) - Replace Unbound feature-gap excuses with what I'm exploring next (persistent SRTT, aggressive NSEC, adaptive hedge delays) - Add context line linking hero cards to the recursive section --- blog/fixing-doh-tail-latency.md | 26 ++++++++++++++------------ site/blog/index.html | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/blog/fixing-doh-tail-latency.md b/blog/fixing-doh-tail-latency.md index 661c456..02d066c 100644 --- a/blog/fixing-doh-tail-latency.md +++ b/blog/fixing-doh-tail-latency.md @@ -1,10 +1,12 @@ --- title: Fixing DNS tail latency with a 5-line config and a 50-line function -description: We had periodic 40-140ms DoH spikes from hyper's dispatch channel. The fix was reqwest window tuning and request hedging — Dean & Barroso's "The Tail at Scale," applied to a DNS forwarder. Same ideas took our cold recursive p99 from 2.3 seconds to 538ms. +description: Periodic 40-140ms DoH spikes from hyper's dispatch channel. The fix was reqwest window tuning and request hedging — Dean & Barroso's "The Tail at Scale," applied to a DNS forwarder. Same ideas took cold recursive p99 from 2.3 seconds to 538ms. date: 2026-04-12 --- -Numa forwards DNS queries over HTTPS using reqwest. When we benchmarked the DoH path, we found periodic 40-140ms latency spikes every ~100ms of wall clock, in an otherwise ~10ms distribution. The tail was dragging our average — median 10ms, mean 23ms. +If you're using reqwest for small HTTP/2 payloads, you probably have a tail latency problem you don't know about. Hyper's default flow control windows are 10,000× oversized for anything under 1 KB, and its dispatch channel adds periodic 40-140ms stalls that don't show up in median benchmarks. + +I hit this building [Numa](https://github.com/razvandimescu/numa), a DNS resolver that forwards queries over HTTPS. Median was 10ms, mean was 23ms — the tail was dragging everything.
      @@ -24,21 +26,21 @@ Numa forwards DNS queries over HTTPS using reqwest. When we benchmarked the DoH
      -The fix was a 5-line reqwest config and a 50-line hedging function. This post is also an advertisement for Dean & Barroso's 2013 paper ["The Tail at Scale"](https://research.google/pubs/pub40801/) — a decade-old idea that still demolishes dispatch spikes. +The fix was a 5-line reqwest config and a 50-line hedging function. This post is also an advertisement for Dean & Barroso's 2013 paper ["The Tail at Scale"](https://research.google/pubs/pub40801/) — a decade-old idea that still demolishes dispatch spikes. The same ideas later took my cold recursive p99 from 2.3 seconds to 538ms. --- ## The cause: hyper's dispatch channel -Reqwest sits on top of hyper, which interposes an mpsc dispatch channel and a separate `ClientTask` between `.send()` and the h2 stream. We instrumented the forwarding path and confirmed: 100% of the spike time lives in the `send()` phase, and a parallel heartbeat task showed zero runtime lag during spikes. The tokio runtime was fine — the stall was internal to hyper's request scheduling. +Reqwest sits on top of hyper, which interposes an mpsc dispatch channel and a separate `ClientTask` between `.send()` and the h2 stream. I instrumented the forwarding path and confirmed: 100% of the spike time lives in the `send()` phase, and a parallel heartbeat task showed zero runtime lag during spikes. The tokio runtime was fine — the stall was internal to hyper's request scheduling. -Hickory-resolver doesn't have this issue. It holds `h2::SendRequest` directly and calls `ready().await; send_request()` in the caller's task — no channel, no scheduling dependency. We used it as a reference point throughout. +Hickory-resolver doesn't have this issue. It holds `h2::SendRequest` directly and calls `ready().await; send_request()` in the caller's task — no channel, no scheduling dependency. I used it as a reference point throughout. ## Fix #1 — HTTP/2 window sizes Reqwest inherits hyper's HTTP/2 defaults: 2 MB stream window, 5 MB connection window. For DNS responses (~200 bytes), that's ~10,000× oversized — unnecessary WINDOW_UPDATE frames, bloated bookkeeping on every poll, and different server-side scheduling behavior. -Setting both windows to the h2 spec default (64 KB) dropped our median from 13.3ms to 10.1ms: +Setting both windows to the h2 spec default (64 KB) dropped my median from 13.3ms to 10.1ms: ```rust reqwest::Client::builder() @@ -94,7 +96,7 @@ pub async fn forward_with_hedging_raw( } ``` -The [production version](https://github.com/razvandimescu/numa/blob/main/src/forward.rs#L267) adds error handling — if one leg fails, it waits for the other. In production, Numa passes the same `&Upstream` twice when only one is configured. We extended hedging to all protocols — UDP (rescues packet loss on WiFi), DoT (rescues TLS handshake stalls). Configurable via `hedge_ms`; set to 0 to disable. +The [production version](https://github.com/razvandimescu/numa/blob/main/src/forward.rs#L267) adds error handling — if one leg fails, it waits for the other. In production, Numa passes the same `&Upstream` twice when only one is configured. I extended hedging to all protocols — UDP (rescues packet loss on WiFi), DoT (rescues TLS handshake stalls). Configurable via `hedge_ms`; set to 0 to disable. **Caveat: hedging hurts on degraded networks.** When latency is consistently high (no random spikes, just slow), the hedge adds overhead with nothing to rescue. Hedging is a variance reducer, not a latency reducer — it only helps when spikes are *random*. @@ -116,13 +118,13 @@ The internal improvement: hedging cut p95 by 45%, p99 by 37%, σ by 57%. The exa ## Recursive resolution: from 2.3 seconds to 538ms -Forwarding is one job. Recursive resolution — walking from root hints through TLD nameservers to the authoritative server — is a different one. We started 15× behind Unbound on cold recursive p99 and traced it to four root causes. +Forwarding is one job. Recursive resolution — walking from root hints through TLD nameservers to the authoritative server — is a different one. I started 15× behind Unbound on cold recursive p99 and traced it to four root causes. -**1. Missing NS delegation caching.** We cached glue records (ns1's IP) but not the delegation itself. Every `.com` query walked from root. Fix: cache NS records from referral authority sections. (10 lines) +**1. Missing NS delegation caching.** I cached glue records (ns1's IP) but not the delegation itself. Every `.com` query walked from root. Fix: cache NS records from referral authority sections. (10 lines) **2. Expired cache entries caused full cold resolutions.** Fix: serve-stale ([RFC 8767](https://www.rfc-editor.org/rfc/rfc8767)) — return expired entries with TTL=1 while revalidating in the background. (20 lines) -**3. 1,900ms wasted per unreachable server.** 800ms UDP timeout + unconditional 1,500ms TCP fallback. Fix: 400ms UDP, TCP only for truncation. (5 lines) +**3. Wasting 1,900ms per unreachable server.** 800ms UDP timeout + unconditional 1,500ms TCP fallback. Fix: 400ms UDP, TCP only for truncation. (5 lines) **4. Sequential NS queries on cold starts.** Fix: fire to the top 2 nameservers simultaneously. First response wins, SRTT recorded for both. Same hedging principle. (50 lines) @@ -150,9 +152,9 @@ Genuine cold benchmarks — unique subdomains, 1 query per domain, 5 iterations, | σ | 254ms | **114ms** | 457ms | | median | — | 77.6ms | 74.7ms | -Unbound wins median by ~4% — its C implementation and 19 years of recursive optimization give it an edge on raw speed. It also has features we don't yet: aggressive NSEC caching ([RFC 8198](https://www.rfc-editor.org/rfc/rfc8198)) and a persistent infra cache. Where hedging shines is the tail — domains with slow or unreachable nameservers, where parallel queries turn worst-case sequential timeouts into races. +Unbound wins median by ~4%. Where hedging shines is the tail — domains with slow or unreachable nameservers, where parallel queries turn worst-case sequential timeouts into races. Cache hits are tied at 0.1ms across Numa, Unbound, and AdGuard Home. -Cache hits are tied across Numa, Unbound, and AdGuard Home — all serve at 0.1ms. +What I'm exploring next: persistent SRTT data across restarts (currently cold-starts lose all server timing), aggressive NSEC caching to shortcut negative lookups, and adaptive hedge delays that tune themselves to observed network conditions instead of a fixed 10ms. --- diff --git a/site/blog/index.html b/site/blog/index.html index d4df9e4..b11e182 100644 --- a/site/blog/index.html +++ b/site/blog/index.html @@ -171,7 +171,7 @@ body::before {
    • Fixing DNS tail latency with a 5-line config and a 50-line function
      -
      We had periodic 40-140ms DoH spikes from hyper's dispatch channel. The fix was reqwest window tuning and request hedging — Dean & Barroso's "The Tail at Scale," applied to a DNS forwarder. Same ideas took our cold recursive p99 from 2.3 seconds to 538ms.
      +
      Periodic 40-140ms DoH spikes from hyper's dispatch channel. The fix was reqwest window tuning and request hedging — Dean & Barroso's "The Tail at Scale," applied to a DNS forwarder. Same ideas took cold recursive p99 from 2.3 seconds to 538ms.
    • -- 2.34.1 From 75fe625f39acecd6a0676d29ab8e434d0548db70 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 14:48:34 +0300 Subject: [PATCH 052/139] blog: drop redundant Numa intro from opening paragraph --- blog/fixing-doh-tail-latency.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blog/fixing-doh-tail-latency.md b/blog/fixing-doh-tail-latency.md index 02d066c..54872aa 100644 --- a/blog/fixing-doh-tail-latency.md +++ b/blog/fixing-doh-tail-latency.md @@ -6,7 +6,7 @@ date: 2026-04-12 If you're using reqwest for small HTTP/2 payloads, you probably have a tail latency problem you don't know about. Hyper's default flow control windows are 10,000× oversized for anything under 1 KB, and its dispatch channel adds periodic 40-140ms stalls that don't show up in median benchmarks. -I hit this building [Numa](https://github.com/razvandimescu/numa), a DNS resolver that forwards queries over HTTPS. Median was 10ms, mean was 23ms — the tail was dragging everything. +I hit this building Numa's DoH forwarding path. Median was 10ms, mean was 23ms — the tail was dragging everything.
      -- 2.34.1 From 7cc110a0a1725664de283900bd89dcf8254d92e1 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 15:02:19 +0300 Subject: [PATCH 053/139] ci: skip CI and AUR builds for blog/site-only changes Add paths-ignore for site/, blog/, drafts/, *.md, and blog scripts so content-only pushes don't trigger cargo builds or AUR publishes. --- .github/workflows/ci.yml | 14 ++++++++++++++ .github/workflows/publish-aur.yml | 7 +++++++ 2 files changed, 21 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e0d06f9..0ad7e45 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,8 +3,22 @@ name: CI on: push: branches: [main] + paths-ignore: + - 'site/**' + - 'blog/**' + - 'drafts/**' + - '*.md' + - 'scripts/serve-site.sh' + - 'scripts/generate-blog-index.sh' pull_request: branches: [main] + paths-ignore: + - 'site/**' + - 'blog/**' + - 'drafts/**' + - '*.md' + - 'scripts/serve-site.sh' + - 'scripts/generate-blog-index.sh' env: CARGO_TERM_COLOR: always diff --git a/.github/workflows/publish-aur.yml b/.github/workflows/publish-aur.yml index 49275a0..6bd77e7 100644 --- a/.github/workflows/publish-aur.yml +++ b/.github/workflows/publish-aur.yml @@ -23,6 +23,13 @@ name: Publish - Arch Linux AUR Package on: push: branches: [main] + paths-ignore: + - 'site/**' + - 'blog/**' + - 'drafts/**' + - '*.md' + - 'scripts/serve-site.sh' + - 'scripts/generate-blog-index.sh' workflow_dispatch: permissions: -- 2.34.1 From 3b77dcff616345d260bdd47872a54125b413c8a5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 15:48:29 +0300 Subject: [PATCH 054/139] =?UTF-8?q?feat:=20Docker=20support=20=E2=80=94=20?= =?UTF-8?q?multi-arch=20GHCR=20images=20on=20release?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CI workflow to build linux/amd64 + linux/arm64 images and push to ghcr.io/razvandimescu/numa on tag. Fix Dockerfile (missing benches/), bake container-aware config (API + proxy bind 0.0.0.0), add Docker section to README. --- .github/workflows/docker.yml | 45 ++++++++++++++++++++++++++++++++++++ Dockerfile | 2 ++ README.md | 23 ++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 .github/workflows/docker.yml diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..04df96a --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,45 @@ +name: Docker + +on: + push: + tags: + - 'v*' + +permissions: + contents: read + packages: write + +jobs: + docker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - uses: docker/setup-qemu-action@v3 + + - uses: docker/setup-buildx-action@v3 + + - uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - uses: docker/metadata-action@v5 + id: meta + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,value=latest + + - uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/Dockerfile b/Dockerfile index e4ab8f5..466239d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ RUN mkdir src && echo 'fn main() {}' > src/main.rs && echo '' > src/lib.rs RUN cargo build --release 2>/dev/null || true RUN rm -rf src COPY src/ src/ +COPY benches/ benches/ COPY site/ site/ COPY numa.toml com.numa.dns.plist numa.service ./ RUN touch src/main.rs src/lib.rs @@ -13,5 +14,6 @@ RUN cargo build --release FROM alpine:3.23 COPY --from=builder /app/target/release/numa /usr/local/bin/numa +RUN mkdir -p /root/.config/numa && printf '[server]\napi_bind_addr = "0.0.0.0"\n\n[proxy]\nenabled = true\nbind_addr = "0.0.0.0"\n' > /root/.config/numa/numa.toml EXPOSE 53/udp 80/tcp 443/tcp 853/tcp 5380/tcp ENTRYPOINT ["numa"] diff --git a/README.md b/README.md index 9979d46..1728461 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,9 @@ yay -S numa-git # Windows — download from GitHub Releases # All platforms cargo install numa + +# Docker +docker run -d --name numa --network host ghcr.io/razvandimescu/numa ``` ```bash @@ -102,6 +105,26 @@ From Machine B: `curl http://api.numa` → proxied to Machine A's port 8000. Ena **Hub mode**: run one instance with `bind_addr = "0.0.0.0:53"` and point other devices' DNS to it — they get ad blocking + `.numa` resolution without installing anything. +## Docker + +```bash +# Recommended — host networking (Linux) +docker run -d --name numa --network host ghcr.io/razvandimescu/numa + +# Port mapping (macOS/Windows Docker Desktop) +docker run -d --name numa -p 53:53/udp -p 53:53/tcp -p 5380:5380 ghcr.io/razvandimescu/numa +``` + +Dashboard at `http://localhost:5380`. The image binds the API and proxy to `0.0.0.0` by default. Override with a custom config: + +```bash +docker run -d --name numa --network host \ + -v /path/to/numa.toml:/root/.config/numa/numa.toml \ + ghcr.io/razvandimescu/numa +``` + +Multi-arch: `linux/amd64` and `linux/arm64`. + ## How It Compares | | Pi-hole | AdGuard Home | Unbound | Numa | -- 2.34.1 From 7dc1a0686f7dcfad6fa58550acd065db30810877 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 13 Apr 2026 15:58:52 +0300 Subject: [PATCH 055/139] fix: add llvm-libs to AUR makedepends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #97 — on minimal Arch installs, rustc fails with "error while loading shared libraries: libLLVM.so" because llvm-libs isn't pulled in transitively. --- PKGBUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PKGBUILD b/PKGBUILD index b3e3f6b..7081d9f 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -9,7 +9,7 @@ url="https://github.com/razvandimescu/numa" license=('MIT') options=('!lto') depends=('gcc-libs' 'glibc') -makedepends=('cargo' 'git') +makedepends=('cargo' 'git' 'llvm-libs') provides=("$_pkgname") conflicts=("$_pkgname") backup=('etc/numa.toml') -- 2.34.1 From b4b939c78bce38a8781c202be2eb10c798a6b68e Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 14 Apr 2026 09:22:24 +0300 Subject: [PATCH 056/139] fix: accept tls:// and https:// in [[forwarding]] upstreams Config-level forwarding rules were parsed with the UDP-only `parse_upstream_addr` helper, silently rejecting the DoT/DoH schemes that the rest of the forwarding pipeline already supports. Widen `ForwardingRule.upstream` from `SocketAddr` to `Upstream` so config rules reuse the same parser as `[upstream].address` and `fallback`. Demote `parse_upstream_addr` to `pub(crate)` to prevent the same mistake recurring. Closes #100. --- numa.toml | 8 ++++++++ src/config.rs | 44 ++++++++++++++++++++++++++++++++++++++++---- src/ctx.rs | 12 +++++++----- src/forward.rs | 11 ++++++++++- src/system_dns.rs | 15 ++++++++++----- 5 files changed, 75 insertions(+), 15 deletions(-) diff --git a/numa.toml b/numa.toml index 1ea3341..4edee81 100644 --- a/numa.toml +++ b/numa.toml @@ -58,6 +58,14 @@ api_port = 5380 # [[forwarding]] # suffix = ["home.local", "home.arpa"] # multiple suffixes → same upstream # upstream = "10.0.0.1" # port 53 default +# +# [[forwarding]] # DoT upstream: tls://IP[:port]#hostname +# suffix = ["google.com", "goog"] # hostname is the TLS SNI / cert name +# upstream = "tls://9.9.9.9#dns.quad9.net" # port 853 default +# +# [[forwarding]] # DoH upstream: full https:// URL +# suffix = "example.corp" +# upstream = "https://dns.quad9.net/dns-query" # [blocking] # enabled = true # set to false to disable ad blocking diff --git a/src/config.rs b/src/config.rs index 237f3bd..4d22956 100644 --- a/src/config.rs +++ b/src/config.rs @@ -46,12 +46,12 @@ pub struct ForwardingRuleConfig { impl ForwardingRuleConfig { fn to_runtime_rules(&self) -> Result> { - let addr = crate::forward::parse_upstream_addr(&self.upstream, 53) + let upstream = crate::forward::parse_upstream(&self.upstream, 53) .map_err(|e| format!("forwarding rule for upstream '{}': {}", self.upstream, e))?; Ok(self .suffix .iter() - .map(|s| crate::system_dns::ForwardingRule::new(s.clone(), addr)) + .map(|s| crate::system_dns::ForwardingRule::new(s.clone(), upstream.clone())) .collect()) } } @@ -710,6 +710,10 @@ mod tests { }; let runtime = rule.to_runtime_rules().unwrap(); assert_eq!(runtime.len(), 1); + assert!(matches!( + runtime[0].upstream, + crate::forward::Upstream::Udp(_) + )); assert_eq!(runtime[0].upstream.to_string(), "100.90.1.63:5361"); assert_eq!(runtime[0].suffix, "home.local"); } @@ -733,6 +737,38 @@ mod tests { assert!(rule.to_runtime_rules().is_err()); } + #[test] + fn forwarding_upstream_accepts_dot_scheme() { + let rule = ForwardingRuleConfig { + suffix: vec!["google.com".to_string()], + upstream: "tls://9.9.9.9#dns.quad9.net".to_string(), + }; + let runtime = rule + .to_runtime_rules() + .expect("tls:// upstream should parse"); + assert_eq!(runtime.len(), 1); + assert_eq!( + runtime[0].upstream.to_string(), + "tls://9.9.9.9:853#dns.quad9.net" + ); + } + + #[test] + fn forwarding_upstream_accepts_doh_scheme() { + let rule = ForwardingRuleConfig { + suffix: vec!["goog".to_string()], + upstream: "https://dns.quad9.net/dns-query".to_string(), + }; + let runtime = rule + .to_runtime_rules() + .expect("https:// upstream should parse"); + assert_eq!(runtime.len(), 1); + assert_eq!( + runtime[0].upstream.to_string(), + "https://dns.quad9.net/dns-query" + ); + } + #[test] fn forwarding_config_rules_take_precedence_over_discovered() { let config_rules = vec![ForwardingRuleConfig { @@ -741,7 +777,7 @@ mod tests { }]; let discovered = vec![crate::system_dns::ForwardingRule::new( "home.local".to_string(), - "192.168.1.1:53".parse().unwrap(), + crate::forward::Upstream::Udp("192.168.1.1:53".parse().unwrap()), )]; let merged = merge_forwarding_rules(&config_rules, discovered).unwrap(); let picked = crate::system_dns::match_forwarding_rule("host.home.local", &merged) @@ -757,7 +793,7 @@ mod tests { }]; let discovered = vec![crate::system_dns::ForwardingRule::new( "corp.example".to_string(), - "192.168.1.1:53".parse().unwrap(), + crate::forward::Upstream::Udp("192.168.1.1:53".parse().unwrap()), )]; let merged = merge_forwarding_rules(&config_rules, discovered).unwrap(); assert_eq!(merged.len(), 2); diff --git a/src/ctx.rs b/src/ctx.rs index 2812bed..222e407 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -190,13 +190,12 @@ pub async fn resolve_query( resp.header.authed_data = true; } (resp, QueryPath::Cached, cached_dnssec) - } else if let Some(fwd_addr) = + } else if let Some(upstream) = crate::system_dns::match_forwarding_rule(&qname, &ctx.forwarding_rules) { // Conditional forwarding takes priority over recursive mode // (e.g. Tailscale .ts.net, VPC private zones) - let upstream = Upstream::Udp(fwd_addr); - match forward_and_cache(raw_wire, &upstream, ctx, &qname, qtype).await { + match forward_and_cache(raw_wire, upstream, ctx, &qname, qtype).await { Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), Err(e) => { error!( @@ -1083,7 +1082,7 @@ mod tests { let mut ctx = crate::testutil::test_ctx().await; ctx.forwarding_rules = vec![ForwardingRule::new( "168.192.in-addr.arpa".to_string(), - upstream_addr, + Upstream::Udp(upstream_addr), )]; let ctx = Arc::new(ctx); @@ -1236,7 +1235,10 @@ mod tests { let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await; let mut ctx = crate::testutil::test_ctx().await; - ctx.forwarding_rules = vec![ForwardingRule::new("corp".to_string(), upstream_addr)]; + ctx.forwarding_rules = vec![ForwardingRule::new( + "corp".to_string(), + Upstream::Udp(upstream_addr), + )]; let ctx = Arc::new(ctx); let (resp, path) = resolve_in_test(&ctx, "internal.corp", QueryType::A).await; diff --git a/src/forward.rs b/src/forward.rs index e13e360..7c7a53a 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -36,6 +36,12 @@ impl PartialEq for Upstream { } } +impl fmt::Debug for Upstream { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f) + } +} + impl fmt::Display for Upstream { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -49,7 +55,10 @@ impl fmt::Display for Upstream { } } -pub fn parse_upstream_addr(s: &str, default_port: u16) -> std::result::Result { +pub(crate) fn parse_upstream_addr( + s: &str, + default_port: u16, +) -> std::result::Result { // Try full socket addr first: "1.2.3.4:5353" or "[::1]:5353" if let Ok(addr) = s.parse::() { return Ok(addr); diff --git a/src/system_dns.rs b/src/system_dns.rs index d560a6e..96ae372 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -2,6 +2,8 @@ use std::net::SocketAddr; use log::info; +use crate::forward::Upstream; + fn print_recursive_hint() { let is_recursive = crate::config::load_config("numa.toml") .map(|c| c.config.upstream.mode == crate::config::UpstreamMode::Recursive) @@ -22,11 +24,11 @@ fn is_loopback_or_stub(addr: &str) -> bool { pub struct ForwardingRule { pub suffix: String, dot_suffix: String, // pre-computed ".suffix" for zero-alloc matching - pub upstream: SocketAddr, + pub upstream: Upstream, } impl ForwardingRule { - pub fn new(suffix: String, upstream: SocketAddr) -> Self { + pub fn new(suffix: String, upstream: Upstream) -> Self { let dot_suffix = format!(".{}", suffix); Self { suffix, @@ -233,7 +235,7 @@ fn discover_macos() -> SystemDnsInfo { #[cfg(any(target_os = "macos", target_os = "linux"))] fn make_rule(domain: &str, nameserver: &str) -> Option { let addr = crate::forward::parse_upstream_addr(nameserver, 53).ok()?; - Some(ForwardingRule::new(domain.to_string(), addr)) + Some(ForwardingRule::new(domain.to_string(), Upstream::Udp(addr))) } #[cfg(target_os = "linux")] @@ -822,10 +824,13 @@ fn uninstall_windows() -> Result<(), String> { /// Find the upstream for a domain by checking forwarding rules. /// Returns None if no rule matches (use default upstream). /// Zero-allocation on the hot path — dot_suffix is pre-computed. -pub fn match_forwarding_rule(domain: &str, rules: &[ForwardingRule]) -> Option { +pub fn match_forwarding_rule<'a>( + domain: &str, + rules: &'a [ForwardingRule], +) -> Option<&'a Upstream> { for rule in rules { if domain == rule.suffix || domain.ends_with(&rule.dot_suffix) { - return Some(rule.upstream); + return Some(&rule.upstream); } } None -- 2.34.1 From 120ba5200e62373d32f8ecdd37a9033b6a7718e0 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 14 Apr 2026 13:31:35 +0300 Subject: [PATCH 057/139] chore: bump version to 0.13.1 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbbd921..c01e85f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1330,7 +1330,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.13.0" +version = "0.13.1" dependencies = [ "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index 19044ab..0b13af2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.13.0" +version = "0.13.1" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" -- 2.34.1 From e0e0f50838892d93d2b36ac3c5f2a88f6ad50554 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 14 Apr 2026 18:18:32 +0300 Subject: [PATCH 058/139] feat: distinguish UPSTREAM vs FORWARD in logs and stats Queries matching a [[forwarding]] suffix rule now log as FORWARD; queries resolved via the default [upstream] pool log as UPSTREAM. Previously both paths shared the FORWARD label, making it impossible to tell from logs whether a rule matched. Adds QueryPath::Upstream, a queries.upstream stats counter exposed via /stats, plus a matching dashboard filter, bar, and path tag. Closes part of #102. --- site/dashboard.html | 6 +++++- src/api.rs | 2 ++ src/ctx.rs | 30 +++++++++++++++++++++++++++++- src/stats.rs | 14 +++++++++++++- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index 2d9cc60..d3837eb 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -217,6 +217,7 @@ body { min-width: 2px; } .path-bar-fill.forward { background: var(--amber); } +.path-bar-fill.upstream { background: var(--amber-dim); } .path-bar-fill.recursive { background: var(--cyan); } .path-bar-fill.cached { background: var(--teal); } .path-bar-fill.local { background: var(--violet); } @@ -285,6 +286,7 @@ body { font-weight: 500; } .path-tag.FORWARD { background: rgba(192, 98, 58, 0.12); color: var(--amber-dim); } +.path-tag.UPSTREAM { background: rgba(160, 120, 72, 0.12); color: var(--amber-dim); } .path-tag.RECURSIVE { background: rgba(74, 124, 138, 0.12); color: var(--cyan); } .path-tag.CACHED { background: rgba(107, 124, 78, 0.12); color: var(--teal-dim); } .path-tag.LOCAL { background: rgba(100, 116, 139, 0.12); color: var(--violet-dim); } @@ -655,6 +657,7 @@ body { + @@ -957,6 +960,7 @@ function encryptionPct(transport) { const PATH_DEFS = [ { key: 'forwarded', label: 'Forward', cls: 'forward' }, + { key: 'upstream', label: 'Upstream', cls: 'upstream' }, { key: 'recursive', label: 'Recursive', cls: 'recursive' }, { key: 'cached', label: 'Cached', cls: 'cached' }, { key: 'local', label: 'Local', cls: 'local' }, @@ -1209,7 +1213,7 @@ async function refresh() { prevTime = now; // Cache hit rate - const answered = q.cached + q.forwarded + q.recursive + q.coalesced + q.local + q.overridden; + const answered = q.cached + q.forwarded + q.upstream + q.recursive + q.coalesced + q.local + q.overridden; const hitRate = answered > 0 ? ((q.cached / answered) * 100).toFixed(1) : '0.0'; document.getElementById('cacheRate').textContent = hitRate + '%'; diff --git a/src/api.rs b/src/api.rs index 6ec3e48..17c4614 100644 --- a/src/api.rs +++ b/src/api.rs @@ -201,6 +201,7 @@ struct LanStatsResponse { struct QueriesStats { total: u64, forwarded: u64, + upstream: u64, recursive: u64, coalesced: u64, cached: u64, @@ -548,6 +549,7 @@ async fn stats(State(ctx): State>) -> Json { queries: QueriesStats { total: snap.total, forwarded: snap.forwarded, + upstream: snap.upstream, recursive: snap.recursive, coalesced: snap.coalesced, cached: snap.cached, diff --git a/src/ctx.rs b/src/ctx.rs index 222e407..b65f6c2 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -246,7 +246,7 @@ pub async fn resolve_query( .await { Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) { - Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), + Ok(resp) => (resp, QueryPath::Upstream, DnssecStatus::Indeterminate), Err(e) => { error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e); ( @@ -1253,4 +1253,32 @@ mod tests { other => panic!("expected A record, got {:?}", other), } } + + #[tokio::test] + async fn pipeline_default_pool_reports_upstream_path() { + // No forwarding rule matches — query falls through to the default + // [upstream] pool. Path must be reported as Upstream (not Forwarded) + // so operators can distinguish [[forwarding]] hits from pool traffic. + let mut upstream_resp = DnsPacket::new(); + upstream_resp.header.response = true; + upstream_resp.header.rescode = ResultCode::NOERROR; + upstream_resp.answers.push(DnsRecord::A { + domain: "example.com".to_string(), + addr: Ipv4Addr::new(93, 184, 216, 34), + ttl: 300, + }); + let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await; + + let mut ctx = crate::testutil::test_ctx().await; + ctx.upstream_pool = std::sync::Mutex::new(crate::forward::UpstreamPool::new( + vec![Upstream::Udp(upstream_addr)], + vec![], + )); + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "example.com", QueryType::A).await; + assert_eq!(path, QueryPath::Upstream); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + assert_eq!(resp.answers.len(), 1); + } } diff --git a/src/stats.rs b/src/stats.rs index feae945..df9127c 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -90,6 +90,7 @@ fn linux_rss() -> usize { pub struct ServerStats { queries_total: u64, queries_forwarded: u64, + queries_upstream: u64, queries_recursive: u64, queries_coalesced: u64, queries_cached: u64, @@ -127,7 +128,10 @@ impl Transport { pub enum QueryPath { Local, Cached, + /// Matched a `[[forwarding]]` suffix rule. Forwarded, + /// Resolved via the default `[upstream]` pool (no suffix match). + Upstream, Recursive, Coalesced, Blocked, @@ -141,6 +145,7 @@ impl QueryPath { QueryPath::Local => "LOCAL", QueryPath::Cached => "CACHED", QueryPath::Forwarded => "FORWARD", + QueryPath::Upstream => "UPSTREAM", QueryPath::Recursive => "RECURSIVE", QueryPath::Coalesced => "COALESCED", QueryPath::Blocked => "BLOCKED", @@ -156,6 +161,8 @@ impl QueryPath { Some(QueryPath::Cached) } else if s.eq_ignore_ascii_case("FORWARD") { Some(QueryPath::Forwarded) + } else if s.eq_ignore_ascii_case("UPSTREAM") { + Some(QueryPath::Upstream) } else if s.eq_ignore_ascii_case("RECURSIVE") { Some(QueryPath::Recursive) } else if s.eq_ignore_ascii_case("COALESCED") { @@ -183,6 +190,7 @@ impl ServerStats { ServerStats { queries_total: 0, queries_forwarded: 0, + queries_upstream: 0, queries_recursive: 0, queries_coalesced: 0, queries_cached: 0, @@ -204,6 +212,7 @@ impl ServerStats { QueryPath::Local => self.queries_local += 1, QueryPath::Cached => self.queries_cached += 1, QueryPath::Forwarded => self.queries_forwarded += 1, + QueryPath::Upstream => self.queries_upstream += 1, QueryPath::Recursive => self.queries_recursive += 1, QueryPath::Coalesced => self.queries_coalesced += 1, QueryPath::Blocked => self.queries_blocked += 1, @@ -232,6 +241,7 @@ impl ServerStats { uptime_secs: self.uptime_secs(), total: self.queries_total, forwarded: self.queries_forwarded, + upstream: self.queries_upstream, recursive: self.queries_recursive, coalesced: self.queries_coalesced, cached: self.queries_cached, @@ -253,10 +263,11 @@ impl ServerStats { let secs = uptime.as_secs() % 60; log::info!( - "STATS | uptime {}h{}m{}s | total {} | fwd {} | recursive {} | coalesced {} | cached {} | local {} | override {} | blocked {} | errors {}", + "STATS | uptime {}h{}m{}s | total {} | fwd {} | upstream {} | recursive {} | coalesced {} | cached {} | local {} | override {} | blocked {} | errors {}", hours, mins, secs, self.queries_total, self.queries_forwarded, + self.queries_upstream, self.queries_recursive, self.queries_coalesced, self.queries_cached, @@ -272,6 +283,7 @@ pub struct StatsSnapshot { pub uptime_secs: u64, pub total: u64, pub forwarded: u64, + pub upstream: u64, pub recursive: u64, pub coalesced: u64, pub cached: u64, -- 2.34.1 From ebb2a5db392b3bbc205afbbeecff35c9925209dc Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 14 Apr 2026 18:26:45 +0300 Subject: [PATCH 059/139] =?UTF-8?q?refactor:=20simplify=20upstream-path=20?= =?UTF-8?q?test=20=E2=80=94=20reuse=20pool=20mutex,=20drop=20narrating=20c?= =?UTF-8?q?omment?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ctx.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index b65f6c2..eeca407 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -1256,9 +1256,6 @@ mod tests { #[tokio::test] async fn pipeline_default_pool_reports_upstream_path() { - // No forwarding rule matches — query falls through to the default - // [upstream] pool. Path must be reported as Upstream (not Forwarded) - // so operators can distinguish [[forwarding]] hits from pool traffic. let mut upstream_resp = DnsPacket::new(); upstream_resp.header.response = true; upstream_resp.header.rescode = ResultCode::NOERROR; @@ -1269,11 +1266,11 @@ mod tests { }); let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await; - let mut ctx = crate::testutil::test_ctx().await; - ctx.upstream_pool = std::sync::Mutex::new(crate::forward::UpstreamPool::new( - vec![Upstream::Udp(upstream_addr)], - vec![], - )); + let ctx = crate::testutil::test_ctx().await; + ctx.upstream_pool + .lock() + .unwrap() + .set_primary(vec![Upstream::Udp(upstream_addr)]); let ctx = Arc::new(ctx); let (resp, path) = resolve_in_test(&ctx, "example.com", QueryType::A).await; -- 2.34.1 From 4bd08e206db25c99b2da008aab4a4bfb203ccf51 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 14 Apr 2026 21:25:11 +0300 Subject: [PATCH 060/139] feat(dashboard): hide zero-count path and transport rows --- site/dashboard.html | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index d3837eb..77018fc 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -939,10 +939,12 @@ function renderMemory(mem, stats) { function renderBarChart(containerId, defs, data, total) { total = total || 1; - document.getElementById(containerId).innerHTML = defs.map(d => { - const count = data[d.key] || 0; - const pct = ((count / total) * 100).toFixed(1); - return ` + document.getElementById(containerId).innerHTML = defs + .filter(d => (data[d.key] || 0) > 0) + .map(d => { + const count = data[d.key] || 0; + const pct = ((count / total) * 100).toFixed(1); + return `
      ${d.label}
      @@ -950,7 +952,7 @@ function renderBarChart(containerId, defs, data, total) {
      ${pct}%
      `; - }).join(''); + }).join(''); } function encryptionPct(transport) { -- 2.34.1 From 9a0d586b138765fa3f5b82861734f6da1dec8557 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 15 Apr 2026 04:03:38 +0300 Subject: [PATCH 061/139] feat: accept array of upstreams in [[forwarding]] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors `[upstream] address` — `upstream` accepts string or array of strings, builds an `UpstreamPool` and routes queries through `forward_with_failover_raw` so SRTT ordering and failover apply to matched `[[forwarding]]` rules the same way they do for the default pool. Single-string rules keep their current behavior (one-element pool, equivalent single-upstream path). Empty array errors at config load. Addresses item 1 of issue #102. Plan: docs/102_item1.md. --- numa.toml | 7 +++ src/config.rs | 119 +++++++++++++++++++++++++++++++++++----------- src/ctx.rs | 77 +++++++++++++++++++++++------- src/forward.rs | 2 +- src/main.rs | 6 ++- src/system_dns.rs | 14 +++--- 6 files changed, 172 insertions(+), 53 deletions(-) diff --git a/numa.toml b/numa.toml index 4edee81..ebb9720 100644 --- a/numa.toml +++ b/numa.toml @@ -66,6 +66,13 @@ api_port = 5380 # [[forwarding]] # DoH upstream: full https:// URL # suffix = "example.corp" # upstream = "https://dns.quad9.net/dns-query" +# +# [[forwarding]] # array of upstreams → SRTT-aware failover +# suffix = ["google.com", "goog"] # fastest-healthy first, dead one skipped +# upstream = [ +# "tls://9.9.9.9#dns.quad9.net", +# "tls://149.112.112.112#dns.quad9.net", +# ] # [blocking] # enabled = true # set to false to disable ad blocking diff --git a/src/config.rs b/src/config.rs index 4d22956..90d1ba3 100644 --- a/src/config.rs +++ b/src/config.rs @@ -41,17 +41,30 @@ pub struct Config { pub struct ForwardingRuleConfig { #[serde(deserialize_with = "string_or_vec")] pub suffix: Vec, - pub upstream: String, + #[serde(deserialize_with = "string_or_vec")] + pub upstream: Vec, } impl ForwardingRuleConfig { fn to_runtime_rules(&self) -> Result> { - let upstream = crate::forward::parse_upstream(&self.upstream, 53) - .map_err(|e| format!("forwarding rule for upstream '{}': {}", self.upstream, e))?; + if self.upstream.is_empty() { + return Err(format!( + "forwarding rule for suffix {:?}: upstream must not be empty", + self.suffix + ) + .into()); + } + let mut primary = Vec::with_capacity(self.upstream.len()); + for s in &self.upstream { + let u = crate::forward::parse_upstream(s, 53) + .map_err(|e| format!("forwarding rule for upstream '{}': {}", s, e))?; + primary.push(u); + } + let pool = crate::forward::UpstreamPool::new(primary, vec![]); Ok(self .suffix .iter() - .map(|s| crate::system_dns::ForwardingRule::new(s.clone(), upstream.clone())) + .map(|s| crate::system_dns::ForwardingRule::new(s.clone(), pool.clone())) .collect()) } } @@ -643,7 +656,7 @@ mod tests { let config: Config = toml::from_str(toml).unwrap(); assert_eq!(config.forwarding.len(), 1); assert_eq!(config.forwarding[0].suffix, &["home.local"]); - assert_eq!(config.forwarding[0].upstream, "100.90.1.63:5361"); + assert_eq!(config.forwarding[0].upstream, vec!["100.90.1.63:5361"]); } #[test] @@ -671,7 +684,7 @@ mod tests { "#; let config: Config = toml::from_str(toml).unwrap(); assert_eq!(config.forwarding.len(), 2); - assert_eq!(config.forwarding[1].upstream, "10.0.0.1"); + assert_eq!(config.forwarding[1].upstream, vec!["10.0.0.1"]); } #[test] @@ -693,28 +706,29 @@ mod tests { fn forwarding_suffix_array_expands_to_multiple_runtime_rules() { let rule = ForwardingRuleConfig { suffix: vec!["168.192.in-addr.arpa".to_string(), "onsite".to_string()], - upstream: "192.168.88.1".to_string(), + upstream: vec!["192.168.88.1".to_string()], }; let runtime = rule.to_runtime_rules().unwrap(); assert_eq!(runtime.len(), 2); assert_eq!(runtime[0].suffix, "168.192.in-addr.arpa"); assert_eq!(runtime[1].suffix, "onsite"); - assert_eq!(runtime[0].upstream, runtime[1].upstream); + assert_eq!( + runtime[0].upstream.preferred(), + runtime[1].upstream.preferred() + ); } #[test] fn forwarding_upstream_with_explicit_port() { let rule = ForwardingRuleConfig { suffix: vec!["home.local".to_string()], - upstream: "100.90.1.63:5361".to_string(), + upstream: vec!["100.90.1.63:5361".to_string()], }; let runtime = rule.to_runtime_rules().unwrap(); assert_eq!(runtime.len(), 1); - assert!(matches!( - runtime[0].upstream, - crate::forward::Upstream::Udp(_) - )); - assert_eq!(runtime[0].upstream.to_string(), "100.90.1.63:5361"); + let preferred = runtime[0].upstream.preferred().unwrap(); + assert!(matches!(preferred, crate::forward::Upstream::Udp(_))); + assert_eq!(preferred.to_string(), "100.90.1.63:5361"); assert_eq!(runtime[0].suffix, "home.local"); } @@ -722,17 +736,20 @@ mod tests { fn forwarding_upstream_defaults_to_port_53() { let rule = ForwardingRuleConfig { suffix: vec!["home.local".to_string()], - upstream: "100.90.1.63".to_string(), + upstream: vec!["100.90.1.63".to_string()], }; let runtime = rule.to_runtime_rules().unwrap(); - assert_eq!(runtime[0].upstream.to_string(), "100.90.1.63:53"); + assert_eq!( + runtime[0].upstream.preferred().unwrap().to_string(), + "100.90.1.63:53" + ); } #[test] fn forwarding_invalid_upstream_returns_error() { let rule = ForwardingRuleConfig { suffix: vec!["home.local".to_string()], - upstream: "not-a-valid-host".to_string(), + upstream: vec!["not-a-valid-host".to_string()], }; assert!(rule.to_runtime_rules().is_err()); } @@ -741,14 +758,14 @@ mod tests { fn forwarding_upstream_accepts_dot_scheme() { let rule = ForwardingRuleConfig { suffix: vec!["google.com".to_string()], - upstream: "tls://9.9.9.9#dns.quad9.net".to_string(), + upstream: vec!["tls://9.9.9.9#dns.quad9.net".to_string()], }; let runtime = rule .to_runtime_rules() .expect("tls:// upstream should parse"); assert_eq!(runtime.len(), 1); assert_eq!( - runtime[0].upstream.to_string(), + runtime[0].upstream.preferred().unwrap().to_string(), "tls://9.9.9.9:853#dns.quad9.net" ); } @@ -757,14 +774,14 @@ mod tests { fn forwarding_upstream_accepts_doh_scheme() { let rule = ForwardingRuleConfig { suffix: vec!["goog".to_string()], - upstream: "https://dns.quad9.net/dns-query".to_string(), + upstream: vec!["https://dns.quad9.net/dns-query".to_string()], }; let runtime = rule .to_runtime_rules() .expect("https:// upstream should parse"); assert_eq!(runtime.len(), 1); assert_eq!( - runtime[0].upstream.to_string(), + runtime[0].upstream.preferred().unwrap().to_string(), "https://dns.quad9.net/dns-query" ); } @@ -773,44 +790,90 @@ mod tests { fn forwarding_config_rules_take_precedence_over_discovered() { let config_rules = vec![ForwardingRuleConfig { suffix: vec!["home.local".to_string()], - upstream: "10.0.0.1:53".to_string(), + upstream: vec!["10.0.0.1:53".to_string()], }]; let discovered = vec![crate::system_dns::ForwardingRule::new( "home.local".to_string(), - crate::forward::Upstream::Udp("192.168.1.1:53".parse().unwrap()), + crate::forward::UpstreamPool::new( + vec![crate::forward::Upstream::Udp( + "192.168.1.1:53".parse().unwrap(), + )], + vec![], + ), )]; let merged = merge_forwarding_rules(&config_rules, discovered).unwrap(); let picked = crate::system_dns::match_forwarding_rule("host.home.local", &merged) .expect("rule should match"); - assert_eq!(picked.to_string(), "10.0.0.1:53"); + assert_eq!(picked.preferred().unwrap().to_string(), "10.0.0.1:53"); } #[test] fn forwarding_merge_preserves_non_overlapping_discovered() { let config_rules = vec![ForwardingRuleConfig { suffix: vec!["home.local".to_string()], - upstream: "10.0.0.1:53".to_string(), + upstream: vec!["10.0.0.1:53".to_string()], }]; let discovered = vec![crate::system_dns::ForwardingRule::new( "corp.example".to_string(), - crate::forward::Upstream::Udp("192.168.1.1:53".parse().unwrap()), + crate::forward::UpstreamPool::new( + vec![crate::forward::Upstream::Udp( + "192.168.1.1:53".parse().unwrap(), + )], + vec![], + ), )]; let merged = merge_forwarding_rules(&config_rules, discovered).unwrap(); assert_eq!(merged.len(), 2); let picked = crate::system_dns::match_forwarding_rule("host.corp.example", &merged) .expect("discovered rule should still match"); - assert_eq!(picked.to_string(), "192.168.1.1:53"); + assert_eq!(picked.preferred().unwrap().to_string(), "192.168.1.1:53"); } #[test] fn forwarding_merge_suffix_array_expands_to_multiple_rules() { let config_rules = vec![ForwardingRuleConfig { suffix: vec!["a.local".to_string(), "b.local".to_string()], - upstream: "10.0.0.1:53".to_string(), + upstream: vec!["10.0.0.1:53".to_string()], }]; let merged = merge_forwarding_rules(&config_rules, vec![]).unwrap(); assert_eq!(merged.len(), 2); } + + #[test] + fn forwarding_parses_upstream_array() { + let toml = r#" + [[forwarding]] + suffix = "google.com" + upstream = ["tls://9.9.9.9#dns.quad9.net", "tls://149.112.112.112#dns.quad9.net"] + "#; + let config: Config = toml::from_str(toml).unwrap(); + assert_eq!(config.forwarding.len(), 1); + assert_eq!(config.forwarding[0].upstream.len(), 2); + } + + #[test] + fn forwarding_upstream_array_builds_pool_with_multiple_primaries() { + let rule = ForwardingRuleConfig { + suffix: vec!["google.com".to_string()], + upstream: vec![ + "tls://9.9.9.9#dns.quad9.net".to_string(), + "tls://149.112.112.112#dns.quad9.net".to_string(), + ], + }; + let runtime = rule.to_runtime_rules().unwrap(); + assert_eq!(runtime.len(), 1); + let label = runtime[0].upstream.label(); + assert!(label.contains("+1 more"), "label was: {}", label); + } + + #[test] + fn forwarding_empty_upstream_array_errors() { + let rule = ForwardingRuleConfig { + suffix: vec!["home.local".to_string()], + upstream: vec![], + }; + assert!(rule.to_runtime_rules().is_err()); + } } pub struct ConfigLoad { diff --git a/src/ctx.rs b/src/ctx.rs index 222e407..6467620 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -16,7 +16,9 @@ use crate::blocklist::BlocklistStore; use crate::buffer::BytePacketBuffer; use crate::cache::{DnsCache, DnssecStatus}; use crate::config::{UpstreamMode, ZoneMap}; -use crate::forward::{forward_query_raw, forward_with_failover_raw, Upstream, UpstreamPool}; +use crate::forward::{forward_with_failover_raw, UpstreamPool}; +#[cfg(test)] +use crate::forward::Upstream; use crate::header::ResultCode; use crate::health::HealthMeta; use crate::lan::PeerStore; @@ -190,13 +192,31 @@ pub async fn resolve_query( resp.header.authed_data = true; } (resp, QueryPath::Cached, cached_dnssec) - } else if let Some(upstream) = + } else if let Some(pool) = crate::system_dns::match_forwarding_rule(&qname, &ctx.forwarding_rules) { // Conditional forwarding takes priority over recursive mode // (e.g. Tailscale .ts.net, VPC private zones) - match forward_and_cache(raw_wire, upstream, ctx, &qname, qtype).await { - Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), + match forward_with_failover_raw( + raw_wire, + pool, + &ctx.srtt, + ctx.timeout, + ctx.hedge_delay, + ) + .await + { + Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) { + Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), + Err(e) => { + error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e); + ( + DnsPacket::response_from(&query, ResultCode::SERVFAIL), + QueryPath::UpstreamError, + DnssecStatus::Indeterminate, + ) + } + }, Err(e) => { error!( "{} | {:?} {} | FORWARD ERROR | {}", @@ -433,17 +453,6 @@ pub async fn refresh_entry(ctx: &ServerCtx, qname: &str, qtype: QueryType) { } } -async fn forward_and_cache( - wire: &[u8], - upstream: &Upstream, - ctx: &ServerCtx, - qname: &str, - qtype: QueryType, -) -> crate::Result { - let resp_wire = forward_query_raw(wire, upstream, ctx.timeout).await?; - cache_and_parse(ctx, qname, qtype, &resp_wire) -} - pub async fn handle_query( mut buffer: BytePacketBuffer, raw_len: usize, @@ -1082,7 +1091,7 @@ mod tests { let mut ctx = crate::testutil::test_ctx().await; ctx.forwarding_rules = vec![ForwardingRule::new( "168.192.in-addr.arpa".to_string(), - Upstream::Udp(upstream_addr), + UpstreamPool::new(vec![Upstream::Udp(upstream_addr)], vec![]), )]; let ctx = Arc::new(ctx); @@ -1237,7 +1246,7 @@ mod tests { let mut ctx = crate::testutil::test_ctx().await; ctx.forwarding_rules = vec![ForwardingRule::new( "corp".to_string(), - Upstream::Udp(upstream_addr), + UpstreamPool::new(vec![Upstream::Udp(upstream_addr)], vec![]), )]; let ctx = Arc::new(ctx); @@ -1253,4 +1262,38 @@ mod tests { other => panic!("expected A record, got {:?}", other), } } + + #[tokio::test] + async fn pipeline_forwarding_fails_over_to_second_upstream() { + let dead = crate::testutil::blackhole_upstream(); + + let mut live_resp = DnsPacket::new(); + live_resp.header.response = true; + live_resp.header.rescode = ResultCode::NOERROR; + live_resp.answers.push(DnsRecord::A { + domain: "internal.corp".to_string(), + addr: Ipv4Addr::new(10, 9, 9, 9), + ttl: 600, + }); + let live = crate::testutil::mock_upstream(live_resp).await; + + let mut ctx = crate::testutil::test_ctx().await; + ctx.forwarding_rules = vec![ForwardingRule::new( + "corp".to_string(), + UpstreamPool::new( + vec![Upstream::Udp(dead), Upstream::Udp(live)], + vec![], + ), + )]; + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "internal.corp", QueryType::A).await; + assert_eq!(path, QueryPath::Forwarded); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + assert_eq!(resp.answers.len(), 1); + match &resp.answers[0] { + DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::new(10, 9, 9, 9)), + other => panic!("expected A record, got {:?}", other), + } + } } diff --git a/src/forward.rs b/src/forward.rs index 7c7a53a..8bb548e 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -118,7 +118,7 @@ fn build_dot_connector() -> Result { ))) } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct UpstreamPool { primary: Vec, fallback: Vec, diff --git a/src/main.rs b/src/main.rs index bce7add..529d40e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -212,7 +212,11 @@ async fn main() -> numa::Result<()> { for fwd in &config.forwarding { for suffix in &fwd.suffix { - info!("forwarding .{} to {} (config rule)", suffix, fwd.upstream); + info!( + "forwarding .{} to {} (config rule)", + suffix, + fwd.upstream.join(", ") + ); } } let forwarding_rules = diff --git a/src/system_dns.rs b/src/system_dns.rs index 96ae372..8b1c4ed 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -2,7 +2,7 @@ use std::net::SocketAddr; use log::info; -use crate::forward::Upstream; +use crate::forward::{Upstream, UpstreamPool}; fn print_recursive_hint() { let is_recursive = crate::config::load_config("numa.toml") @@ -24,11 +24,11 @@ fn is_loopback_or_stub(addr: &str) -> bool { pub struct ForwardingRule { pub suffix: String, dot_suffix: String, // pre-computed ".suffix" for zero-alloc matching - pub upstream: Upstream, + pub upstream: UpstreamPool, } impl ForwardingRule { - pub fn new(suffix: String, upstream: Upstream) -> Self { + pub fn new(suffix: String, upstream: UpstreamPool) -> Self { let dot_suffix = format!(".{}", suffix); Self { suffix, @@ -216,7 +216,8 @@ fn discover_macos() -> SystemDnsInfo { for rule in &rules { info!( "auto-discovered forwarding: *.{} -> {}", - rule.suffix, rule.upstream + rule.suffix, + rule.upstream.label() ); } if rules.is_empty() { @@ -235,7 +236,8 @@ fn discover_macos() -> SystemDnsInfo { #[cfg(any(target_os = "macos", target_os = "linux"))] fn make_rule(domain: &str, nameserver: &str) -> Option { let addr = crate::forward::parse_upstream_addr(nameserver, 53).ok()?; - Some(ForwardingRule::new(domain.to_string(), Upstream::Udp(addr))) + let pool = UpstreamPool::new(vec![Upstream::Udp(addr)], vec![]); + Some(ForwardingRule::new(domain.to_string(), pool)) } #[cfg(target_os = "linux")] @@ -827,7 +829,7 @@ fn uninstall_windows() -> Result<(), String> { pub fn match_forwarding_rule<'a>( domain: &str, rules: &'a [ForwardingRule], -) -> Option<&'a Upstream> { +) -> Option<&'a UpstreamPool> { for rule in rules { if domain == rule.suffix || domain.ends_with(&rule.dot_suffix) { return Some(&rule.upstream); -- 2.34.1 From fef43635d61766a3f8b638acf33302044fa79905 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 15 Apr 2026 04:11:27 +0300 Subject: [PATCH 062/139] fix(ci): rustfmt import order and gate Upstream import for Windows --- src/ctx.rs | 7 ++----- src/system_dns.rs | 4 +++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 6467620..e339e81 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -16,9 +16,9 @@ use crate::blocklist::BlocklistStore; use crate::buffer::BytePacketBuffer; use crate::cache::{DnsCache, DnssecStatus}; use crate::config::{UpstreamMode, ZoneMap}; -use crate::forward::{forward_with_failover_raw, UpstreamPool}; #[cfg(test)] use crate::forward::Upstream; +use crate::forward::{forward_with_failover_raw, UpstreamPool}; use crate::header::ResultCode; use crate::health::HealthMeta; use crate::lan::PeerStore; @@ -1280,10 +1280,7 @@ mod tests { let mut ctx = crate::testutil::test_ctx().await; ctx.forwarding_rules = vec![ForwardingRule::new( "corp".to_string(), - UpstreamPool::new( - vec![Upstream::Udp(dead), Upstream::Udp(live)], - vec![], - ), + UpstreamPool::new(vec![Upstream::Udp(dead), Upstream::Udp(live)], vec![]), )]; let ctx = Arc::new(ctx); diff --git a/src/system_dns.rs b/src/system_dns.rs index 8b1c4ed..a450e01 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -2,7 +2,9 @@ use std::net::SocketAddr; use log::info; -use crate::forward::{Upstream, UpstreamPool}; +#[cfg(any(target_os = "macos", target_os = "linux"))] +use crate::forward::Upstream; +use crate::forward::UpstreamPool; fn print_recursive_hint() { let is_recursive = crate::config::load_config("numa.toml") -- 2.34.1 From b403671e11cb6669015c8a48dc1aebede3661385 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 15 Apr 2026 14:27:17 +0300 Subject: [PATCH 063/139] chore(deps): bump rustls-webpki to 0.103.12 Patches RUSTSEC-2026-0098 (URI name constraints incorrectly accepted) and RUSTSEC-2026-0099 (wildcard cert name constraints), both published 2026-04-14. Transitive via reqwest / rustls / hickory / quinn. --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c01e85f..9cd1b7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1834,9 +1834,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.10" +version = "0.103.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06" dependencies = [ "aws-lc-rs", "ring", -- 2.34.1 From cea4b0ef8842a9c061266701d55f298906a5be71 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 15 Apr 2026 22:14:36 +0300 Subject: [PATCH 064/139] feat(windows): add windows-service crate + SCM dispatcher scaffold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets numa.exe act as a real Windows service registered with the SCM, replacing the HKLM\...\Run login-time autostart that runs in the user session without stderr capture. - New `numa::windows_service` module (cfg(windows)) wraps Mullvad's `windows-service` crate: registers with SCM, reports Running, handles Stop/Shutdown, reports Stopped. - `numa.exe --service` is the entry point SCM uses (`sc create … binPath="numa.exe --service"`); interactive invocations are unchanged. - Dep is gated `[target.'cfg(windows)'.dependencies]` — zero impact on macOS/Linux builds or binary size. Scaffold only. The service currently blocks on an mpsc channel until Stop arrives; the actual serve loop will hook in once main.rs's inline server body is extracted into `numa::serve(config_path)` in a follow-up. This lets `sc start Numa` / `sc stop Numa` be verified end to end today. --- Cargo.lock | 12 ++++++ Cargo.toml | 3 ++ src/lib.rs | 3 ++ src/main.rs | 8 ++++ src/windows_service.rs | 85 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 111 insertions(+) create mode 100644 src/windows_service.rs diff --git a/Cargo.lock b/Cargo.lock index 9cd1b7d..cf25b3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1359,6 +1359,7 @@ dependencies = [ "toml", "tower", "webpki-roots 1.0.6", + "windows-service", "x509-parser", ] @@ -2583,6 +2584,17 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-service" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24d6bcc7f734a4091ecf8d7a64c5f7d7066f45585c1861eba06449909609c8a" +dependencies = [ + "bitflags", + "widestring", + "windows-sys 0.52.0", +] + [[package]] name = "windows-strings" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index 0b13af2..3b3234f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,9 @@ rustls-pemfile = "2.2.0" qrcode = { version = "0.14", default-features = false, features = ["svg"] } webpki-roots = "1" +[target.'cfg(windows)'.dependencies] +windows-service = "0.7" + [dev-dependencies] criterion = { version = "0.8", features = ["html_reports"] } tower = { version = "0.5", features = ["util"] } diff --git a/src/lib.rs b/src/lib.rs index 8933e2a..346c739 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,9 @@ pub mod system_dns; pub mod tls; pub mod wire; +#[cfg(windows)] +pub mod windows_service; + #[cfg(test)] pub(crate) mod testutil; diff --git a/src/main.rs b/src/main.rs index bce7add..0459005 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,6 +32,14 @@ async fn main() -> numa::Result<()> { // Handle CLI subcommands let arg1 = std::env::args().nth(1).unwrap_or_default(); match arg1.as_str() { + #[cfg(windows)] + "--service" => { + // Entry point used by Windows SCM (`sc create … binPath="numa.exe --service"`). + // Hands control to the service dispatcher and blocks until Stop. + numa::windows_service::run_as_service() + .map_err(|e| format!("windows service dispatcher failed: {}", e))?; + return Ok(()); + } "install" => { eprintln!("\x1b[1;38;2;192;98;58mNuma\x1b[0m — installing\n"); return install_service().map_err(|e| e.into()); diff --git a/src/windows_service.rs b/src/windows_service.rs new file mode 100644 index 0000000..8751f23 --- /dev/null +++ b/src/windows_service.rs @@ -0,0 +1,85 @@ +//! Windows service wrapper. +//! +//! Lets the `numa.exe` binary act as a real Windows service registered with +//! the Service Control Manager (SCM). Invoked via `numa.exe --service` (the +//! form that `sc create … binPath=` uses). +//! +//! Interactive runs (`numa.exe`, `numa.exe run`, `numa.exe install`) do not +//! go through this module — they keep their existing console-attached +//! behaviour. + +use std::ffi::OsString; +use std::sync::mpsc; +use std::time::Duration; + +use windows_service::service::{ + ServiceControl, ServiceControlAccept, ServiceExitCode, ServiceState, ServiceStatus, ServiceType, +}; +use windows_service::service_control_handler::{self, ServiceControlHandlerResult}; +use windows_service::{define_windows_service, service_dispatcher}; + +pub const SERVICE_NAME: &str = "Numa"; + +define_windows_service!(ffi_service_main, service_main); + +/// Entry point the SCM hands control to after `StartServiceCtrlDispatcherW`. +/// Any panic here vanishes silently into the service host — log instead of +/// unwrapping. +fn service_main(_arguments: Vec) { + if let Err(e) = run_service() { + log::error!("numa service exited with error: {:?}", e); + } +} + +fn run_service() -> windows_service::Result<()> { + let (shutdown_tx, shutdown_rx) = mpsc::channel::<()>(); + + let event_handler = move |control_event| -> ServiceControlHandlerResult { + match control_event { + ServiceControl::Stop | ServiceControl::Shutdown => { + let _ = shutdown_tx.send(()); + ServiceControlHandlerResult::NoError + } + ServiceControl::Interrogate => ServiceControlHandlerResult::NoError, + _ => ServiceControlHandlerResult::NotImplemented, + } + }; + + let status_handle = service_control_handler::register(SERVICE_NAME, event_handler)?; + + status_handle.set_service_status(ServiceStatus { + service_type: ServiceType::OWN_PROCESS, + current_state: ServiceState::Running, + controls_accepted: ServiceControlAccept::STOP | ServiceControlAccept::SHUTDOWN, + exit_code: ServiceExitCode::Win32(0), + checkpoint: 0, + wait_hint: Duration::default(), + process_id: None, + })?; + + // TODO(windows-service): call numa's async serve loop here once main.rs's + // server body is extracted into `numa::serve(config_path)`. For now the + // service registers, reports Running, and blocks until SCM sends Stop — + // useful for verifying the SCM plumbing end to end with `sc start Numa` + // and `sc stop Numa`. + let _ = shutdown_rx.recv(); + + status_handle.set_service_status(ServiceStatus { + service_type: ServiceType::OWN_PROCESS, + current_state: ServiceState::Stopped, + controls_accepted: ServiceControlAccept::empty(), + exit_code: ServiceExitCode::Win32(0), + checkpoint: 0, + wait_hint: Duration::default(), + process_id: None, + })?; + + Ok(()) +} + +/// Hand control to the SCM dispatcher. Blocks until the service stops. +/// Call only from the `--service` command path — interactive invocations +/// will hang here waiting for an SCM that isn't talking to them. +pub fn run_as_service() -> windows_service::Result<()> { + service_dispatcher::start(SERVICE_NAME, ffi_service_main) +} -- 2.34.1 From b610160cd1855fc79770661a3ce457859073698f Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 15 Apr 2026 22:24:23 +0300 Subject: [PATCH 065/139] feat(windows): run numa as a real SCM service, drop Run-key autostart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hooks the service-dispatcher scaffolding from the previous commit to actually serve DNS, and replaces the HKLM\…\Run login-time autostart with a proper Windows service created via sc.exe. **Refactor** - Extract main.rs's inline server body (~500 lines) into `numa::serve::run` so both the interactive CLI entry and the service dispatcher drive the same startup/serve loop. main.rs is now a thin subcommand router. - main.rs goes sync (no #[tokio::main]); each branch that needs async builds its own runtime and block_on's. Required so the --service path can hand off to SCM without fighting tokio for the entry thread. **Windows service wrapper** - `numa::windows_service::run_service` now builds a multi-thread tokio runtime on a dedicated thread and runs `serve::run` inside it. Stop/ Shutdown from SCM aborts the wait loop and reports SERVICE_STOPPED. - Config path resolves to `%PROGRAMDATA%\numa\numa.toml` when running under SCM (SYSTEM's cwd is System32, relative paths don't work). **Install/uninstall** - `install_windows` now copies numa.exe to a stable `%PROGRAMDATA%\numa\bin\numa.exe` and registers it via `sc create` with start=auto, obj=LocalSystem, and a failure policy of restart/5000/restart/5000/restart/10000. Starts the service immediately when no reboot is pending. - `uninstall_windows` stops + deletes the service and removes the binary copy before restoring DNS. - Drops the old `register_autostart` / `remove_autostart` helpers that wrote to `HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Run` — that path runs at user login in the user's session with no stderr capture and no crash-restart policy, which is why we've been flying blind in every Windows debug session. DNS-set bugs (netsh destructive static, IPv6 not touched, uninstall secondary-drop) and file logging are orthogonal — tracked for follow-up. --- src/lib.rs | 1 + src/main.rs | 654 +---------------------------------------- src/serve.rs | 646 ++++++++++++++++++++++++++++++++++++++++ src/system_dns.rs | 185 ++++++++++-- src/windows_service.rs | 59 +++- 5 files changed, 868 insertions(+), 677 deletions(-) create mode 100644 src/serve.rs diff --git a/src/lib.rs b/src/lib.rs index 346c739..0370c37 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,6 +20,7 @@ pub mod query_log; pub mod question; pub mod record; pub mod recursive; +pub mod serve; pub mod service_store; pub mod setup_phone; pub mod srtt; diff --git a/src/main.rs b/src/main.rs index 0459005..88f2128 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,30 +1,6 @@ -use std::net::SocketAddr; -use std::sync::{Arc, Mutex, RwLock}; -use std::time::Duration; +use numa::system_dns::{install_service, restart_service, service_status, uninstall_service}; -use arc_swap::ArcSwap; -use log::{error, info}; -use tokio::net::UdpSocket; - -use numa::blocklist::{download_blocklists, parse_blocklist, BlocklistStore}; -use numa::buffer::BytePacketBuffer; -use numa::cache::DnsCache; -use numa::config::{build_zone_map, load_config, ConfigLoad}; -use numa::ctx::{handle_query, ServerCtx}; -use numa::forward::{parse_upstream, Upstream, UpstreamPool}; -use numa::override_store::OverrideStore; -use numa::query_log::QueryLog; -use numa::service_store::ServiceStore; -use numa::stats::{ServerStats, Transport}; -use numa::system_dns::{ - discover_system_dns, install_service, restart_service, service_status, uninstall_service, -}; - -const QUAD9_IP: &str = "9.9.9.9"; -const DOH_FALLBACK: &str = "https://9.9.9.9/dns-query"; - -#[tokio::main] -async fn main() -> numa::Result<()> { +fn main() -> numa::Result<()> { env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) .format_timestamp_millis() .init(); @@ -35,7 +11,7 @@ async fn main() -> numa::Result<()> { #[cfg(windows)] "--service" => { // Entry point used by Windows SCM (`sc create … binPath="numa.exe --service"`). - // Hands control to the service dispatcher and blocks until Stop. + // Blocks until SCM sends Stop; never returns normally. numa::windows_service::run_as_service() .map_err(|e| format!("windows service dispatcher failed: {}", e))?; return Ok(()); @@ -63,7 +39,12 @@ async fn main() -> numa::Result<()> { }; } "setup-phone" => { - return numa::setup_phone::run().await.map_err(|e| e.into()); + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?; + return runtime + .block_on(numa::setup_phone::run()) + .map_err(|e| e.into()); } "lan" => { let sub = std::env::args().nth(2).unwrap_or_default(); @@ -126,552 +107,11 @@ async fn main() -> numa::Result<()> { } else { arg1 // treat as config path for backwards compatibility }; - let ConfigLoad { - config, - path: resolved_config_path, - found: config_found, - } = load_config(&config_path)?; - // Discover system DNS in a single pass (upstream + forwarding rules) - let system_dns = discover_system_dns(); - - let root_hints = numa::recursive::parse_root_hints(&config.upstream.root_hints); - - let recursive_pool = || { - let dummy = UpstreamPool::new(vec![Upstream::Udp("0.0.0.0:0".parse().unwrap())], vec![]); - (dummy, "recursive (root hints)".to_string()) - }; - - let (resolved_mode, upstream_auto, pool, upstream_label) = match config.upstream.mode { - numa::config::UpstreamMode::Auto => { - info!("auto mode: probing recursive resolution..."); - if numa::recursive::probe_recursive(&root_hints).await { - info!("recursive probe succeeded — self-sovereign mode"); - let (pool, label) = recursive_pool(); - (numa::config::UpstreamMode::Recursive, false, pool, label) - } else { - log::warn!("recursive probe failed — falling back to Quad9 DoH"); - let client = reqwest::Client::builder() - .use_rustls_tls() - .build() - .unwrap_or_default(); - let url = DOH_FALLBACK.to_string(); - let label = url.clone(); - let pool = UpstreamPool::new(vec![Upstream::Doh { url, client }], vec![]); - (numa::config::UpstreamMode::Forward, false, pool, label) - } - } - numa::config::UpstreamMode::Recursive => { - let (pool, label) = recursive_pool(); - (numa::config::UpstreamMode::Recursive, false, pool, label) - } - numa::config::UpstreamMode::Forward => { - let addrs = if config.upstream.address.is_empty() { - let detected = system_dns - .default_upstream - .or_else(numa::system_dns::detect_dhcp_dns) - .unwrap_or_else(|| { - info!("could not detect system DNS, falling back to Quad9 DoH"); - DOH_FALLBACK.to_string() - }); - vec![detected] - } else { - config.upstream.address.clone() - }; - - let primary: Vec = addrs - .iter() - .map(|s| parse_upstream(s, config.upstream.port)) - .collect::>>()?; - let fallback: Vec = config - .upstream - .fallback - .iter() - .map(|s| parse_upstream(s, config.upstream.port)) - .collect::>>()?; - - let pool = UpstreamPool::new(primary, fallback); - let label = pool.label(); - ( - numa::config::UpstreamMode::Forward, - config.upstream.address.is_empty(), - pool, - label, - ) - } - }; - let api_port = config.server.api_port; - - let mut blocklist = BlocklistStore::new(); - for domain in &config.blocking.allowlist { - blocklist.add_to_allowlist(domain); - } - if !config.blocking.enabled { - blocklist.set_enabled(false); - } - - // Build service store: config services + persisted user services - let mut service_store = ServiceStore::new(); - service_store.insert_from_config("numa", config.server.api_port, Vec::new()); - for svc in &config.services { - service_store.insert_from_config(&svc.name, svc.target_port, svc.routes.clone()); - } - service_store.load_persisted(); - - for fwd in &config.forwarding { - for suffix in &fwd.suffix { - info!("forwarding .{} to {} (config rule)", suffix, fwd.upstream); - } - } - let forwarding_rules = - numa::config::merge_forwarding_rules(&config.forwarding, system_dns.forwarding_rules)?; - - // Resolve data_dir from config, falling back to the platform default. - // Used for TLS CA storage below and stored on ServerCtx for runtime use. - let resolved_data_dir = config - .server - .data_dir - .clone() - .unwrap_or_else(numa::data_dir); - - // Build initial TLS config before ServerCtx (so ArcSwap is ready at construction) - let initial_tls = if config.proxy.enabled && config.proxy.tls_port > 0 { - let service_names = service_store.names(); - match numa::tls::build_tls_config( - &config.proxy.tld, - &service_names, - Vec::new(), - &resolved_data_dir, - ) { - Ok(tls_config) => Some(ArcSwap::from(tls_config)), - Err(e) => { - if let Some(advisory) = numa::tls::try_data_dir_advisory(&e, &resolved_data_dir) { - eprint!("{}", advisory); - } else { - log::warn!("TLS setup failed, HTTPS proxy disabled: {}", e); - } - None - } - } - } else { - None - }; - - let doh_enabled = initial_tls.is_some(); - let health_meta = numa::health::HealthMeta::build( - &resolved_data_dir, - config.dot.enabled, - config.dot.port, - config.mobile.port, - config.dnssec.enabled, - resolved_mode == numa::config::UpstreamMode::Recursive, - config.lan.enabled, - config.blocking.enabled, - doh_enabled, - ); - - let ca_pem = std::fs::read_to_string(resolved_data_dir.join("ca.pem")).ok(); - - let socket = match UdpSocket::bind(&config.server.bind_addr).await { - Ok(s) => s, - Err(e) => { - if let Some(advisory) = - numa::system_dns::try_port53_advisory(&config.server.bind_addr, &e) - { - eprint!("{}", advisory); - std::process::exit(1); - } - return Err(e.into()); - } - }; - - let ctx = Arc::new(ServerCtx { - socket, - zone_map: build_zone_map(&config.zones)?, - cache: RwLock::new(DnsCache::new( - config.cache.max_entries, - config.cache.min_ttl, - config.cache.max_ttl, - )), - refreshing: Mutex::new(std::collections::HashSet::new()), - stats: Mutex::new(ServerStats::new()), - overrides: RwLock::new(OverrideStore::new()), - blocklist: RwLock::new(blocklist), - query_log: Mutex::new(QueryLog::new(1000)), - services: Mutex::new(service_store), - lan_peers: Mutex::new(numa::lan::PeerStore::new(config.lan.peer_timeout_secs)), - forwarding_rules, - upstream_pool: Mutex::new(pool), - upstream_auto, - upstream_port: config.upstream.port, - lan_ip: Mutex::new(numa::lan::detect_lan_ip().unwrap_or(std::net::Ipv4Addr::LOCALHOST)), - timeout: Duration::from_millis(config.upstream.timeout_ms), - hedge_delay: Duration::from_millis(config.upstream.hedge_ms), - proxy_tld_suffix: if config.proxy.tld.is_empty() { - String::new() - } else { - format!(".{}", config.proxy.tld) - }, - proxy_tld: config.proxy.tld.clone(), - lan_enabled: config.lan.enabled, - config_path: resolved_config_path, - config_found, - config_dir: numa::config_dir(), - data_dir: resolved_data_dir, - tls_config: initial_tls, - upstream_mode: resolved_mode, - root_hints, - srtt: std::sync::RwLock::new(numa::srtt::SrttCache::new(config.upstream.srtt)), - inflight: std::sync::Mutex::new(std::collections::HashMap::new()), - dnssec_enabled: config.dnssec.enabled, - dnssec_strict: config.dnssec.strict, - health_meta, - ca_pem, - mobile_enabled: config.mobile.enabled, - mobile_port: config.mobile.port, - }); - - let zone_count: usize = ctx.zone_map.values().map(|m| m.len()).sum(); - // Build banner rows, then size the box to fit the longest value - let api_url = format!("http://localhost:{}", api_port); - let proxy_label = if config.proxy.enabled { - if config.proxy.tls_port > 0 { - Some(format!( - "http://:{} https://:{}", - config.proxy.port, config.proxy.tls_port - )) - } else { - Some(format!( - "http://*.{} on :{}", - config.proxy.tld, config.proxy.port - )) - } - } else { - None - }; - let config_label = if ctx.config_found { - ctx.config_path.clone() - } else { - format!("{} (defaults)", ctx.config_path) - }; - let data_label = ctx.data_dir.display().to_string(); - let services_label = ctx.config_dir.join("services.json").display().to_string(); - - // label (10) + value + padding (2) = inner width; minimum 40 for the title row - let val_w = [ - config.server.bind_addr.len(), - api_url.len(), - upstream_label.len(), - config_label.len(), - data_label.len(), - services_label.len(), - ] - .into_iter() - .chain(proxy_label.as_ref().map(|s| s.len())) - .max() - .unwrap_or(30); - let w = (val_w + 12).max(42); // 10 label + 2 padding, min 42 for title - - let o = "\x1b[38;2;192;98;58m"; // orange - let g = "\x1b[38;2;107;124;78m"; // green - let d = "\x1b[38;2;163;152;136m"; // dim - let r = "\x1b[0m"; // reset - let b = "\x1b[1;38;2;192;98;58m"; // bold orange - let it = "\x1b[3;38;2;163;152;136m"; // italic dim - - let bar_top = "═".repeat(w); - let bar_mid = "─".repeat(w); - let row = |label: &str, color: &str, value: &str| { - eprintln!( - "{o} ║{r} {color}{:<9}{r} {: 0 && ctx.tls_config.is_some() { - let proxy_ctx = Arc::clone(&ctx); - let tls_port = config.proxy.tls_port; - tokio::spawn(async move { - numa::proxy::start_proxy_tls(proxy_ctx, tls_port, proxy_bind).await; - }); - } - - // Spawn network change watcher (upstream re-detection, LAN IP update, peer flush) - { - let watch_ctx = Arc::clone(&ctx); - tokio::spawn(async move { - network_watch_loop(watch_ctx).await; - }); - } - - // Spawn LAN service discovery - if config.lan.enabled { - let lan_ctx = Arc::clone(&ctx); - let lan_config = config.lan.clone(); - tokio::spawn(async move { - numa::lan::start_lan_discovery(lan_ctx, &lan_config).await; - }); - } - - // Spawn DNS-over-TLS listener (RFC 7858) - if config.dot.enabled { - let dot_ctx = Arc::clone(&ctx); - let dot_config = config.dot.clone(); - tokio::spawn(async move { - numa::dot::start_dot(dot_ctx, &dot_config).await; - }); - } - - // UDP DNS listener - #[allow(clippy::infinite_loop)] - loop { - let mut buffer = BytePacketBuffer::new(); - let (len, src_addr) = match ctx.socket.recv_from(&mut buffer.buf).await { - Ok(r) => r, - Err(e) if e.kind() == std::io::ErrorKind::ConnectionReset => { - // Windows delivers ICMP port-unreachable as ConnectionReset on UDP sockets - continue; - } - Err(e) => return Err(e.into()), - }; - let ctx = Arc::clone(&ctx); - tokio::spawn(async move { - if let Err(e) = handle_query(buffer, len, src_addr, &ctx, Transport::Udp).await { - error!("{} | HANDLER ERROR | {}", src_addr, e); - } - }); - } -} - -async fn network_watch_loop(ctx: Arc) { - let mut tick: u64 = 0; - - let mut interval = tokio::time::interval(Duration::from_secs(5)); - interval.tick().await; // skip immediate tick - - loop { - interval.tick().await; - tick += 1; - let mut changed = false; - - // Check LAN IP change (every 5s — cheap, one UDP socket call) - if let Some(new_ip) = numa::lan::detect_lan_ip() { - let mut current_ip = ctx.lan_ip.lock().unwrap(); - if new_ip != *current_ip { - info!("LAN IP changed: {} → {}", current_ip, new_ip); - *current_ip = new_ip; - changed = true; - numa::recursive::reset_udp_state(); - } - } - - // Re-detect upstream every 30s or on LAN IP change (auto-detect only) - if ctx.upstream_auto && (changed || tick.is_multiple_of(6)) { - let dns_info = numa::system_dns::discover_system_dns(); - let new_addr = dns_info - .default_upstream - .or_else(numa::system_dns::detect_dhcp_dns) - .unwrap_or_else(|| QUAD9_IP.to_string()); - let mut pool = ctx.upstream_pool.lock().unwrap(); - if pool.maybe_update_primary(&new_addr, ctx.upstream_port) { - info!("upstream changed → {}", pool.label()); - changed = true; - } - } - - // Flush stale LAN peers on any network change - if changed { - ctx.lan_peers.lock().unwrap().clear(); - info!("flushed LAN peers after network change"); - } - - // Re-probe UDP every 5 minutes when disabled - if tick.is_multiple_of(60) { - numa::recursive::probe_udp(&ctx.root_hints).await; - } - } + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; + runtime.block_on(numa::serve::run(config_path)) } fn set_lan_enabled(enabled: bool, path: &str) -> numa::Result<()> { @@ -738,71 +178,3 @@ fn print_lan_status(enabled: bool) { eprintln!(" Restart Numa to start mDNS discovery"); } } - -async fn load_blocklists(ctx: &ServerCtx, lists: &[String]) { - let downloaded = download_blocklists(lists).await; - - // Parse outside the lock to avoid blocking DNS queries during parse (~100ms) - let mut all_domains = std::collections::HashSet::new(); - let mut sources = Vec::new(); - for (source, text) in &downloaded { - let domains = parse_blocklist(text); - info!("blocklist: {} domains from {}", domains.len(), source); - all_domains.extend(domains); - sources.push(source.clone()); - } - let total = all_domains.len(); - - // Swap under lock — sub-microsecond - ctx.blocklist - .write() - .unwrap() - .swap_domains(all_domains, sources); - info!( - "blocking enabled: {} unique domains from {} lists", - total, - downloaded.len() - ); -} - -async fn warm_domain(ctx: &ServerCtx, domain: &str) { - for qtype in [ - numa::question::QueryType::A, - numa::question::QueryType::AAAA, - ] { - numa::ctx::refresh_entry(ctx, domain, qtype).await; - } -} - -async fn doh_keepalive_loop(ctx: Arc) { - let mut interval = tokio::time::interval(Duration::from_secs(25)); - interval.tick().await; // skip first immediate tick - loop { - interval.tick().await; - let pool = ctx.upstream_pool.lock().unwrap().clone(); - if let Some(upstream) = pool.preferred() { - numa::forward::keepalive_doh(upstream).await; - } - } -} - -async fn cache_warm_loop(ctx: Arc, domains: Vec) { - tokio::time::sleep(Duration::from_secs(2)).await; - - for domain in &domains { - warm_domain(&ctx, domain).await; - } - info!("cache warm: {} domains resolved at startup", domains.len()); - - let mut interval = tokio::time::interval(Duration::from_secs(30)); - interval.tick().await; - loop { - interval.tick().await; - for domain in &domains { - let refresh = ctx.cache.read().unwrap().needs_warm(domain); - if refresh { - warm_domain(&ctx, domain).await; - } - } - } -} diff --git a/src/serve.rs b/src/serve.rs new file mode 100644 index 0000000..db0465b --- /dev/null +++ b/src/serve.rs @@ -0,0 +1,646 @@ +//! The main DNS-server runtime. +//! +//! Extracted from `main.rs` so both the interactive CLI entry and the +//! Windows service dispatcher (`windows_service` module) can drive the +//! same startup/serve loop. + +use std::net::SocketAddr; +use std::sync::{Arc, Mutex, RwLock}; +use std::time::Duration; + +use arc_swap::ArcSwap; +use log::{error, info}; +use tokio::net::UdpSocket; + +use crate::blocklist::{download_blocklists, parse_blocklist, BlocklistStore}; +use crate::buffer::BytePacketBuffer; +use crate::cache::DnsCache; +use crate::config::{build_zone_map, load_config, ConfigLoad}; +use crate::ctx::{handle_query, ServerCtx}; +use crate::forward::{parse_upstream, Upstream, UpstreamPool}; +use crate::override_store::OverrideStore; +use crate::query_log::QueryLog; +use crate::service_store::ServiceStore; +use crate::stats::{ServerStats, Transport}; +use crate::system_dns::discover_system_dns; + +const QUAD9_IP: &str = "9.9.9.9"; +const DOH_FALLBACK: &str = "https://9.9.9.9/dns-query"; + +/// Boot the DNS server and run until the UDP listener errors out. +pub async fn run(config_path: String) -> crate::Result<()> { + let ConfigLoad { + config, + path: resolved_config_path, + found: config_found, + } = load_config(&config_path)?; + + // Discover system DNS in a single pass (upstream + forwarding rules) + let system_dns = discover_system_dns(); + + let root_hints = crate::recursive::parse_root_hints(&config.upstream.root_hints); + + let recursive_pool = || { + let dummy = UpstreamPool::new(vec![Upstream::Udp("0.0.0.0:0".parse().unwrap())], vec![]); + (dummy, "recursive (root hints)".to_string()) + }; + + let (resolved_mode, upstream_auto, pool, upstream_label) = match config.upstream.mode { + crate::config::UpstreamMode::Auto => { + info!("auto mode: probing recursive resolution..."); + if crate::recursive::probe_recursive(&root_hints).await { + info!("recursive probe succeeded — self-sovereign mode"); + let (pool, label) = recursive_pool(); + (crate::config::UpstreamMode::Recursive, false, pool, label) + } else { + log::warn!("recursive probe failed — falling back to Quad9 DoH"); + let client = reqwest::Client::builder() + .use_rustls_tls() + .build() + .unwrap_or_default(); + let url = DOH_FALLBACK.to_string(); + let label = url.clone(); + let pool = UpstreamPool::new(vec![Upstream::Doh { url, client }], vec![]); + (crate::config::UpstreamMode::Forward, false, pool, label) + } + } + crate::config::UpstreamMode::Recursive => { + let (pool, label) = recursive_pool(); + (crate::config::UpstreamMode::Recursive, false, pool, label) + } + crate::config::UpstreamMode::Forward => { + let addrs = if config.upstream.address.is_empty() { + let detected = system_dns + .default_upstream + .or_else(crate::system_dns::detect_dhcp_dns) + .unwrap_or_else(|| { + info!("could not detect system DNS, falling back to Quad9 DoH"); + DOH_FALLBACK.to_string() + }); + vec![detected] + } else { + config.upstream.address.clone() + }; + + let primary: Vec = addrs + .iter() + .map(|s| parse_upstream(s, config.upstream.port)) + .collect::>>()?; + let fallback: Vec = config + .upstream + .fallback + .iter() + .map(|s| parse_upstream(s, config.upstream.port)) + .collect::>>()?; + + let pool = UpstreamPool::new(primary, fallback); + let label = pool.label(); + ( + crate::config::UpstreamMode::Forward, + config.upstream.address.is_empty(), + pool, + label, + ) + } + }; + let api_port = config.server.api_port; + + let mut blocklist = BlocklistStore::new(); + for domain in &config.blocking.allowlist { + blocklist.add_to_allowlist(domain); + } + if !config.blocking.enabled { + blocklist.set_enabled(false); + } + + // Build service store: config services + persisted user services + let mut service_store = ServiceStore::new(); + service_store.insert_from_config("numa", config.server.api_port, Vec::new()); + for svc in &config.services { + service_store.insert_from_config(&svc.name, svc.target_port, svc.routes.clone()); + } + service_store.load_persisted(); + + for fwd in &config.forwarding { + for suffix in &fwd.suffix { + info!("forwarding .{} to {} (config rule)", suffix, fwd.upstream); + } + } + let forwarding_rules = + crate::config::merge_forwarding_rules(&config.forwarding, system_dns.forwarding_rules)?; + + // Resolve data_dir from config, falling back to the platform default. + // Used for TLS CA storage below and stored on ServerCtx for runtime use. + let resolved_data_dir = config + .server + .data_dir + .clone() + .unwrap_or_else(crate::data_dir); + + // Build initial TLS config before ServerCtx (so ArcSwap is ready at construction) + let initial_tls = if config.proxy.enabled && config.proxy.tls_port > 0 { + let service_names = service_store.names(); + match crate::tls::build_tls_config( + &config.proxy.tld, + &service_names, + Vec::new(), + &resolved_data_dir, + ) { + Ok(tls_config) => Some(ArcSwap::from(tls_config)), + Err(e) => { + if let Some(advisory) = crate::tls::try_data_dir_advisory(&e, &resolved_data_dir) { + eprint!("{}", advisory); + } else { + log::warn!("TLS setup failed, HTTPS proxy disabled: {}", e); + } + None + } + } + } else { + None + }; + + let doh_enabled = initial_tls.is_some(); + let health_meta = crate::health::HealthMeta::build( + &resolved_data_dir, + config.dot.enabled, + config.dot.port, + config.mobile.port, + config.dnssec.enabled, + resolved_mode == crate::config::UpstreamMode::Recursive, + config.lan.enabled, + config.blocking.enabled, + doh_enabled, + ); + + let ca_pem = std::fs::read_to_string(resolved_data_dir.join("ca.pem")).ok(); + + let socket = match UdpSocket::bind(&config.server.bind_addr).await { + Ok(s) => s, + Err(e) => { + if let Some(advisory) = + crate::system_dns::try_port53_advisory(&config.server.bind_addr, &e) + { + eprint!("{}", advisory); + std::process::exit(1); + } + return Err(e.into()); + } + }; + + let ctx = Arc::new(ServerCtx { + socket, + zone_map: build_zone_map(&config.zones)?, + cache: RwLock::new(DnsCache::new( + config.cache.max_entries, + config.cache.min_ttl, + config.cache.max_ttl, + )), + refreshing: Mutex::new(std::collections::HashSet::new()), + stats: Mutex::new(ServerStats::new()), + overrides: RwLock::new(OverrideStore::new()), + blocklist: RwLock::new(blocklist), + query_log: Mutex::new(QueryLog::new(1000)), + services: Mutex::new(service_store), + lan_peers: Mutex::new(crate::lan::PeerStore::new(config.lan.peer_timeout_secs)), + forwarding_rules, + upstream_pool: Mutex::new(pool), + upstream_auto, + upstream_port: config.upstream.port, + lan_ip: Mutex::new(crate::lan::detect_lan_ip().unwrap_or(std::net::Ipv4Addr::LOCALHOST)), + timeout: Duration::from_millis(config.upstream.timeout_ms), + hedge_delay: Duration::from_millis(config.upstream.hedge_ms), + proxy_tld_suffix: if config.proxy.tld.is_empty() { + String::new() + } else { + format!(".{}", config.proxy.tld) + }, + proxy_tld: config.proxy.tld.clone(), + lan_enabled: config.lan.enabled, + config_path: resolved_config_path, + config_found, + config_dir: crate::config_dir(), + data_dir: resolved_data_dir, + tls_config: initial_tls, + upstream_mode: resolved_mode, + root_hints, + srtt: std::sync::RwLock::new(crate::srtt::SrttCache::new(config.upstream.srtt)), + inflight: std::sync::Mutex::new(std::collections::HashMap::new()), + dnssec_enabled: config.dnssec.enabled, + dnssec_strict: config.dnssec.strict, + health_meta, + ca_pem, + mobile_enabled: config.mobile.enabled, + mobile_port: config.mobile.port, + }); + + let zone_count: usize = ctx.zone_map.values().map(|m| m.len()).sum(); + // Build banner rows, then size the box to fit the longest value + let api_url = format!("http://localhost:{}", api_port); + let proxy_label = if config.proxy.enabled { + if config.proxy.tls_port > 0 { + Some(format!( + "http://:{} https://:{}", + config.proxy.port, config.proxy.tls_port + )) + } else { + Some(format!( + "http://*.{} on :{}", + config.proxy.tld, config.proxy.port + )) + } + } else { + None + }; + let config_label = if ctx.config_found { + ctx.config_path.clone() + } else { + format!("{} (defaults)", ctx.config_path) + }; + let data_label = ctx.data_dir.display().to_string(); + let services_label = ctx.config_dir.join("services.json").display().to_string(); + + // label (10) + value + padding (2) = inner width; minimum 40 for the title row + let val_w = [ + config.server.bind_addr.len(), + api_url.len(), + upstream_label.len(), + config_label.len(), + data_label.len(), + services_label.len(), + ] + .into_iter() + .chain(proxy_label.as_ref().map(|s| s.len())) + .max() + .unwrap_or(30); + let w = (val_w + 12).max(42); // 10 label + 2 padding, min 42 for title + + let o = "\x1b[38;2;192;98;58m"; // orange + let g = "\x1b[38;2;107;124;78m"; // green + let d = "\x1b[38;2;163;152;136m"; // dim + let r = "\x1b[0m"; // reset + let b = "\x1b[1;38;2;192;98;58m"; // bold orange + let it = "\x1b[3;38;2;163;152;136m"; // italic dim + + let bar_top = "═".repeat(w); + let bar_mid = "─".repeat(w); + let row = |label: &str, color: &str, value: &str| { + eprintln!( + "{o} ║{r} {color}{:<9}{r} {: 0 && ctx.tls_config.is_some() { + let proxy_ctx = Arc::clone(&ctx); + let tls_port = config.proxy.tls_port; + tokio::spawn(async move { + crate::proxy::start_proxy_tls(proxy_ctx, tls_port, proxy_bind).await; + }); + } + + // Spawn network change watcher (upstream re-detection, LAN IP update, peer flush) + { + let watch_ctx = Arc::clone(&ctx); + tokio::spawn(async move { + network_watch_loop(watch_ctx).await; + }); + } + + // Spawn LAN service discovery + if config.lan.enabled { + let lan_ctx = Arc::clone(&ctx); + let lan_config = config.lan.clone(); + tokio::spawn(async move { + crate::lan::start_lan_discovery(lan_ctx, &lan_config).await; + }); + } + + // Spawn DNS-over-TLS listener (RFC 7858) + if config.dot.enabled { + let dot_ctx = Arc::clone(&ctx); + let dot_config = config.dot.clone(); + tokio::spawn(async move { + crate::dot::start_dot(dot_ctx, &dot_config).await; + }); + } + + // UDP DNS listener + #[allow(clippy::infinite_loop)] + loop { + let mut buffer = BytePacketBuffer::new(); + let (len, src_addr) = match ctx.socket.recv_from(&mut buffer.buf).await { + Ok(r) => r, + Err(e) if e.kind() == std::io::ErrorKind::ConnectionReset => { + // Windows delivers ICMP port-unreachable as ConnectionReset on UDP sockets + continue; + } + Err(e) => return Err(e.into()), + }; + let ctx = Arc::clone(&ctx); + tokio::spawn(async move { + if let Err(e) = handle_query(buffer, len, src_addr, &ctx, Transport::Udp).await { + error!("{} | HANDLER ERROR | {}", src_addr, e); + } + }); + } +} + +async fn network_watch_loop(ctx: Arc) { + let mut tick: u64 = 0; + + let mut interval = tokio::time::interval(Duration::from_secs(5)); + interval.tick().await; // skip immediate tick + + loop { + interval.tick().await; + tick += 1; + let mut changed = false; + + // Check LAN IP change (every 5s — cheap, one UDP socket call) + if let Some(new_ip) = crate::lan::detect_lan_ip() { + let mut current_ip = ctx.lan_ip.lock().unwrap(); + if new_ip != *current_ip { + info!("LAN IP changed: {} → {}", current_ip, new_ip); + *current_ip = new_ip; + changed = true; + crate::recursive::reset_udp_state(); + } + } + + // Re-detect upstream every 30s or on LAN IP change (auto-detect only) + if ctx.upstream_auto && (changed || tick.is_multiple_of(6)) { + let dns_info = crate::system_dns::discover_system_dns(); + let new_addr = dns_info + .default_upstream + .or_else(crate::system_dns::detect_dhcp_dns) + .unwrap_or_else(|| QUAD9_IP.to_string()); + let mut pool = ctx.upstream_pool.lock().unwrap(); + if pool.maybe_update_primary(&new_addr, ctx.upstream_port) { + info!("upstream changed → {}", pool.label()); + changed = true; + } + } + + // Flush stale LAN peers on any network change + if changed { + ctx.lan_peers.lock().unwrap().clear(); + info!("flushed LAN peers after network change"); + } + + // Re-probe UDP every 5 minutes when disabled + if tick.is_multiple_of(60) { + crate::recursive::probe_udp(&ctx.root_hints).await; + } + } +} + +async fn load_blocklists(ctx: &ServerCtx, lists: &[String]) { + let downloaded = download_blocklists(lists).await; + + // Parse outside the lock to avoid blocking DNS queries during parse (~100ms) + let mut all_domains = std::collections::HashSet::new(); + let mut sources = Vec::new(); + for (source, text) in &downloaded { + let domains = parse_blocklist(text); + info!("blocklist: {} domains from {}", domains.len(), source); + all_domains.extend(domains); + sources.push(source.clone()); + } + let total = all_domains.len(); + + // Swap under lock — sub-microsecond + ctx.blocklist + .write() + .unwrap() + .swap_domains(all_domains, sources); + info!( + "blocking enabled: {} unique domains from {} lists", + total, + downloaded.len() + ); +} + +async fn warm_domain(ctx: &ServerCtx, domain: &str) { + for qtype in [ + crate::question::QueryType::A, + crate::question::QueryType::AAAA, + ] { + crate::ctx::refresh_entry(ctx, domain, qtype).await; + } +} + +async fn doh_keepalive_loop(ctx: Arc) { + let mut interval = tokio::time::interval(Duration::from_secs(25)); + interval.tick().await; // skip first immediate tick + loop { + interval.tick().await; + let pool = ctx.upstream_pool.lock().unwrap().clone(); + if let Some(upstream) = pool.preferred() { + crate::forward::keepalive_doh(upstream).await; + } + } +} + +async fn cache_warm_loop(ctx: Arc, domains: Vec) { + tokio::time::sleep(Duration::from_secs(2)).await; + + for domain in &domains { + warm_domain(&ctx, domain).await; + } + info!("cache warm: {} domains resolved at startup", domains.len()); + + let mut interval = tokio::time::interval(Duration::from_secs(30)); + interval.tick().await; + loop { + interval.tick().await; + for domain in &domains { + let refresh = ctx.cache.read().unwrap().needs_warm(domain); + if refresh { + warm_domain(&ctx, domain).await; + } + } + } +} diff --git a/src/system_dns.rs b/src/system_dns.rs index 96ae372..b39f661 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -697,7 +697,23 @@ fn install_windows() -> Result<(), String> { } let needs_reboot = disable_dnscache()?; - register_autostart(); + + // Copy the binary to a stable path under ProgramData and register it + // as a real Windows service (SCM-managed, boot-time, auto-restart). + let service_exe = install_service_binary()?; + register_service_scm(&service_exe)?; + + // If no reboot is pending (Dnscache wasn't running, port 53 free), + // start the service immediately. Otherwise it'll launch on next boot. + if !needs_reboot { + match start_service_scm() { + Ok(_) => eprintln!(" Service started."), + Err(e) => eprintln!( + " warning: service registered but could not start now: {}", + e + ), + } + } eprintln!(); if !has_useful_existing { @@ -707,51 +723,160 @@ fn install_windows() -> Result<(), String> { if needs_reboot { eprintln!(" *** Reboot required. Numa will start automatically. ***\n"); } else { - eprintln!(" Numa will start automatically on next boot.\n"); + eprintln!(" Numa is running.\n"); } print_recursive_hint(); Ok(()) } -/// Register numa to auto-start on boot via registry Run key. #[cfg(windows)] -fn register_autostart() { - let exe = std::env::current_exe() - .map(|p| p.to_string_lossy().to_string()) - .unwrap_or_else(|_| "numa".into()); - let _ = std::process::Command::new("reg") - .args([ - "add", - "HKLM\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run", - "/v", - "Numa", - "/t", - "REG_SZ", - "/d", - &exe, - "/f", - ]) - .status(); - eprintln!(" Registered auto-start on boot."); +const WINDOWS_SERVICE_NAME: &str = "Numa"; + +/// Stable install location for the service binary. SCM keeps a handle to +/// this path; the user's Downloads folder (where `current_exe()` points at +/// install time) is not durable. +#[cfg(windows)] +fn windows_service_exe_path() -> std::path::PathBuf { + std::path::PathBuf::from( + std::env::var("PROGRAMDATA").unwrap_or_else(|_| "C:\\ProgramData".into()), + ) + .join("numa") + .join("bin") + .join("numa.exe") } -/// Remove numa auto-start registry key. +/// Copy the currently-running binary to the service install location. SCM +/// keeps a handle to this path, so it must be stable across user sessions. #[cfg(windows)] -fn remove_autostart() { - let _ = std::process::Command::new("reg") +fn install_service_binary() -> Result { + let src = std::env::current_exe().map_err(|e| format!("current_exe(): {}", e))?; + let dst = windows_service_exe_path(); + if let Some(parent) = dst.parent() { + std::fs::create_dir_all(parent) + .map_err(|e| format!("failed to create {}: {}", parent.display(), e))?; + } + // Copy only if source and destination differ; running the binary from + // its install location is a supported (re-install) case. + if src != dst { + std::fs::copy(&src, &dst).map_err(|e| { + format!( + "failed to copy {} -> {}: {}", + src.display(), + dst.display(), + e + ) + })?; + } + Ok(dst) +} + +/// Remove the service binary on uninstall. Ignore failures — the service +/// is already deleted; a leftover file in ProgramData is not a hard error. +#[cfg(windows)] +fn remove_service_binary() { + let _ = std::fs::remove_file(windows_service_exe_path()); +} + +/// Register numa with the Service Control Manager, boot-time auto-start, +/// LocalSystem context, with a failure policy of restart-after-5s. +#[cfg(windows)] +fn register_service_scm(exe: &std::path::Path) -> Result<(), String> { + let bin_path = format!("\"{}\" --service", exe.display()); + + // sc.exe uses a leading space as its `name= value` delimiter; the space + // after `=` is mandatory. + let create = std::process::Command::new("sc") .args([ - "delete", - "HKLM\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run", - "/v", - "Numa", - "/f", + "create", + WINDOWS_SERVICE_NAME, + "binPath=", + &bin_path, + "DisplayName=", + "Numa DNS", + "start=", + "auto", + "obj=", + "LocalSystem", ]) + .output() + .map_err(|e| format!("failed to run sc create: {}", e))?; + if !create.status.success() { + let out = String::from_utf8_lossy(&create.stdout); + // "service already exists" is 1073 — treat as idempotent success. + if !out.contains("1073") { + return Err(format!("sc create failed: {}", out.trim())); + } + } + + let _ = std::process::Command::new("sc") + .args([ + "description", + WINDOWS_SERVICE_NAME, + "Self-sovereign DNS resolver (ad blocking, DoH/DoT, local zones).", + ]) + .status(); + + // Restart on crash: 5s, 5s, 10s; reset failure counter after 60s. + let _ = std::process::Command::new("sc") + .args([ + "failure", + WINDOWS_SERVICE_NAME, + "reset=", + "60", + "actions=", + "restart/5000/restart/5000/restart/10000", + ]) + .status(); + + eprintln!( + " Registered service '{}' (boot-time).", + WINDOWS_SERVICE_NAME + ); + Ok(()) +} + +/// Start the service. Safe to call on a freshly-registered service — SCM +/// will fail with 1056 ("already running") or 1058 ("disabled") and we +/// return the underlying error string rather than masking it. +#[cfg(windows)] +fn start_service_scm() -> Result<(), String> { + let out = std::process::Command::new("sc") + .args(["start", WINDOWS_SERVICE_NAME]) + .output() + .map_err(|e| format!("failed to run sc start: {}", e))?; + if !out.status.success() { + let text = String::from_utf8_lossy(&out.stdout); + if text.contains("1056") { + return Ok(()); // already running + } + return Err(format!("sc start failed: {}", text.trim())); + } + Ok(()) +} + +/// Stop the service. Returns Ok if already stopped — idempotent. +#[cfg(windows)] +fn stop_service_scm() { + let _ = std::process::Command::new("sc") + .args(["stop", WINDOWS_SERVICE_NAME]) + .status(); +} + +/// Remove the service from SCM. Safe if already absent. +#[cfg(windows)] +fn delete_service_scm() { + let _ = std::process::Command::new("sc") + .args(["delete", WINDOWS_SERVICE_NAME]) .status(); } #[cfg(windows)] fn uninstall_windows() -> Result<(), String> { - remove_autostart(); + // Stop + remove the service before touching DNS, so port 53 is released + // cleanly and the failure-restart policy doesn't resurrect it. + stop_service_scm(); + delete_service_scm(); + remove_service_binary(); let path = windows_backup_path(); let json = std::fs::read_to_string(&path) .map_err(|e| format!("no backup found at {}: {}", path.display(), e))?; diff --git a/src/windows_service.rs b/src/windows_service.rs index 8751f23..c51339c 100644 --- a/src/windows_service.rs +++ b/src/windows_service.rs @@ -57,12 +57,50 @@ fn run_service() -> windows_service::Result<()> { process_id: None, })?; - // TODO(windows-service): call numa's async serve loop here once main.rs's - // server body is extracted into `numa::serve(config_path)`. For now the - // service registers, reports Running, and blocks until SCM sends Stop — - // useful for verifying the SCM plumbing end to end with `sc start Numa` - // and `sc stop Numa`. - let _ = shutdown_rx.recv(); + // Spin up a multi-threaded tokio runtime and run the server on it. A + // dedicated thread runs the runtime so this function can return cleanly + // once the SCM tells us to stop — we can't block the dispatcher thread + // forever without preventing graceful shutdown. + let config_path = service_config_path(); + let (runtime_stop_tx, runtime_stop_rx) = mpsc::channel::<()>(); + + let server_thread = std::thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + { + Ok(rt) => rt, + Err(e) => { + log::error!("failed to build tokio runtime: {}", e); + let _ = runtime_stop_tx.send(()); + return; + } + }; + + // block_on returns when serve::run's UDP loop errors out OR when the + // runtime is dropped from another thread. Either signals exit. + if let Err(e) = runtime.block_on(crate::serve::run(config_path)) { + log::error!("numa serve exited with error: {}", e); + } + let _ = runtime_stop_tx.send(()); + }); + + // Wait for either SCM stop or server termination. + loop { + if shutdown_rx.try_recv().is_ok() { + break; + } + if runtime_stop_rx.try_recv().is_ok() { + break; + } + std::thread::sleep(Duration::from_millis(200)); + } + + // The server's tokio runtime runs detached inside server_thread. Abandon + // it — the process is about to report Stopped and the SCM will terminate + // us if we linger. Future work: plumb a cancellation signal into + // serve::run() for a clean teardown of listeners and in-flight queries. + drop(server_thread); status_handle.set_service_status(ServiceStatus { service_type: ServiceType::OWN_PROCESS, @@ -83,3 +121,12 @@ fn run_service() -> windows_service::Result<()> { pub fn run_as_service() -> windows_service::Result<()> { service_dispatcher::start(SERVICE_NAME, ffi_service_main) } + +/// Path to the config file used when running under SCM. SCM launches the +/// service with SYSTEM's working directory (usually `C:\Windows\System32`), +/// so a relative `numa.toml` lookup won't find anything meaningful — use an +/// absolute path under `%PROGRAMDATA%` instead. +fn service_config_path() -> String { + let base = std::env::var("PROGRAMDATA").unwrap_or_else(|_| "C:\\ProgramData".into()); + format!("{}\\numa\\numa.toml", base) +} -- 2.34.1 From 7bb484ada3a6efd7521a44e3cc5bbd9dc7fb9dec Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 15 Apr 2026 23:48:09 +0300 Subject: [PATCH 066/139] refactor(windows): deduplicate after simplify review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop the duplicate WINDOWS_SERVICE_NAME constant; call sites use the single source of truth at windows_service::SERVICE_NAME. - windows_service_exe_path and service_config_path now compose from crate::data_dir() instead of re-parsing %PROGRAMDATA% locally. - Factor the 6× sc.exe invocation boilerplate into a run_sc helper. - Replace the 200ms try_recv polling loop in the service dispatcher with a recv_timeout wait — cuts shutdown latency and idle CPU. - stop_service_scm/delete_service_scm now log warnings instead of silently swallowing failures, so unexpected errors are visible. --- src/system_dns.rs | 108 +++++++++++++++++++---------------------- src/windows_service.rs | 22 ++++----- 2 files changed, 61 insertions(+), 69 deletions(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index b39f661..826101d 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -729,20 +729,24 @@ fn install_windows() -> Result<(), String> { Ok(()) } -#[cfg(windows)] -const WINDOWS_SERVICE_NAME: &str = "Numa"; - /// Stable install location for the service binary. SCM keeps a handle to /// this path; the user's Downloads folder (where `current_exe()` points at /// install time) is not durable. #[cfg(windows)] fn windows_service_exe_path() -> std::path::PathBuf { - std::path::PathBuf::from( - std::env::var("PROGRAMDATA").unwrap_or_else(|_| "C:\\ProgramData".into()), - ) - .join("numa") - .join("bin") - .join("numa.exe") + crate::data_dir().join("bin").join("numa.exe") +} + +/// Run `sc.exe` with the given args and return its merged stdout/stderr on +/// failure. `sc` emits errors on stdout (not stderr) on Windows, so the +/// caller reads stdout to format a useful error. +#[cfg(windows)] +fn run_sc(args: &[&str]) -> Result { + let out = std::process::Command::new("sc") + .args(args) + .output() + .map_err(|e| format!("failed to run sc {}: {}", args.first().unwrap_or(&""), e))?; + Ok(out) } /// Copy the currently-running binary to the service install location. SCM @@ -782,24 +786,22 @@ fn remove_service_binary() { #[cfg(windows)] fn register_service_scm(exe: &std::path::Path) -> Result<(), String> { let bin_path = format!("\"{}\" --service", exe.display()); + let name = crate::windows_service::SERVICE_NAME; // sc.exe uses a leading space as its `name= value` delimiter; the space // after `=` is mandatory. - let create = std::process::Command::new("sc") - .args([ - "create", - WINDOWS_SERVICE_NAME, - "binPath=", - &bin_path, - "DisplayName=", - "Numa DNS", - "start=", - "auto", - "obj=", - "LocalSystem", - ]) - .output() - .map_err(|e| format!("failed to run sc create: {}", e))?; + let create = run_sc(&[ + "create", + name, + "binPath=", + &bin_path, + "DisplayName=", + "Numa DNS", + "start=", + "auto", + "obj=", + "LocalSystem", + ])?; if !create.status.success() { let out = String::from_utf8_lossy(&create.stdout); // "service already exists" is 1073 — treat as idempotent success. @@ -808,30 +810,23 @@ fn register_service_scm(exe: &std::path::Path) -> Result<(), String> { } } - let _ = std::process::Command::new("sc") - .args([ - "description", - WINDOWS_SERVICE_NAME, - "Self-sovereign DNS resolver (ad blocking, DoH/DoT, local zones).", - ]) - .status(); + let _ = run_sc(&[ + "description", + name, + "Self-sovereign DNS resolver (ad blocking, DoH/DoT, local zones).", + ]); // Restart on crash: 5s, 5s, 10s; reset failure counter after 60s. - let _ = std::process::Command::new("sc") - .args([ - "failure", - WINDOWS_SERVICE_NAME, - "reset=", - "60", - "actions=", - "restart/5000/restart/5000/restart/10000", - ]) - .status(); + let _ = run_sc(&[ + "failure", + name, + "reset=", + "60", + "actions=", + "restart/5000/restart/5000/restart/10000", + ]); - eprintln!( - " Registered service '{}' (boot-time).", - WINDOWS_SERVICE_NAME - ); + eprintln!(" Registered service '{}' (boot-time).", name); Ok(()) } @@ -840,10 +835,7 @@ fn register_service_scm(exe: &std::path::Path) -> Result<(), String> { /// return the underlying error string rather than masking it. #[cfg(windows)] fn start_service_scm() -> Result<(), String> { - let out = std::process::Command::new("sc") - .args(["start", WINDOWS_SERVICE_NAME]) - .output() - .map_err(|e| format!("failed to run sc start: {}", e))?; + let out = run_sc(&["start", crate::windows_service::SERVICE_NAME])?; if !out.status.success() { let text = String::from_utf8_lossy(&out.stdout); if text.contains("1056") { @@ -854,20 +846,22 @@ fn start_service_scm() -> Result<(), String> { Ok(()) } -/// Stop the service. Returns Ok if already stopped — idempotent. +/// Stop the service. Idempotent — already-stopped or missing service logs +/// a warning but doesn't error, since both callers (install re-run, +/// uninstall) want best-effort cleanup rather than hard failure. #[cfg(windows)] fn stop_service_scm() { - let _ = std::process::Command::new("sc") - .args(["stop", WINDOWS_SERVICE_NAME]) - .status(); + if let Err(e) = run_sc(&["stop", crate::windows_service::SERVICE_NAME]) { + log::warn!("sc stop failed: {}", e); + } } -/// Remove the service from SCM. Safe if already absent. +/// Remove the service from SCM. Idempotent — see `stop_service_scm`. #[cfg(windows)] fn delete_service_scm() { - let _ = std::process::Command::new("sc") - .args(["delete", WINDOWS_SERVICE_NAME]) - .status(); + if let Err(e) = run_sc(&["delete", crate::windows_service::SERVICE_NAME]) { + log::warn!("sc delete failed: {}", e); + } } #[cfg(windows)] diff --git a/src/windows_service.rs b/src/windows_service.rs index c51339c..a1403d7 100644 --- a/src/windows_service.rs +++ b/src/windows_service.rs @@ -62,7 +62,7 @@ fn run_service() -> windows_service::Result<()> { // once the SCM tells us to stop — we can't block the dispatcher thread // forever without preventing graceful shutdown. let config_path = service_config_path(); - let (runtime_stop_tx, runtime_stop_rx) = mpsc::channel::<()>(); + let (server_done_tx, server_done_rx) = mpsc::channel::<()>(); let server_thread = std::thread::spawn(move || { let runtime = match tokio::runtime::Builder::new_multi_thread() @@ -72,28 +72,25 @@ fn run_service() -> windows_service::Result<()> { Ok(rt) => rt, Err(e) => { log::error!("failed to build tokio runtime: {}", e); - let _ = runtime_stop_tx.send(()); + let _ = server_done_tx.send(()); return; } }; - // block_on returns when serve::run's UDP loop errors out OR when the - // runtime is dropped from another thread. Either signals exit. if let Err(e) = runtime.block_on(crate::serve::run(config_path)) { log::error!("numa serve exited with error: {}", e); } - let _ = runtime_stop_tx.send(()); + let _ = server_done_tx.send(()); }); // Wait for either SCM stop or server termination. loop { - if shutdown_rx.try_recv().is_ok() { + if shutdown_rx.recv_timeout(Duration::from_millis(500)).is_ok() { break; } - if runtime_stop_rx.try_recv().is_ok() { + if server_done_rx.try_recv().is_ok() { break; } - std::thread::sleep(Duration::from_millis(200)); } // The server's tokio runtime runs detached inside server_thread. Abandon @@ -124,9 +121,10 @@ pub fn run_as_service() -> windows_service::Result<()> { /// Path to the config file used when running under SCM. SCM launches the /// service with SYSTEM's working directory (usually `C:\Windows\System32`), -/// so a relative `numa.toml` lookup won't find anything meaningful — use an -/// absolute path under `%PROGRAMDATA%` instead. +/// so a relative `numa.toml` lookup won't find anything meaningful. fn service_config_path() -> String { - let base = std::env::var("PROGRAMDATA").unwrap_or_else(|_| "C:\\ProgramData".into()); - format!("{}\\numa\\numa.toml", base) + crate::data_dir() + .join("numa.toml") + .to_string_lossy() + .into_owned() } -- 2.34.1 From cc635f2f73e5fac3f7999f27eb352d6c00c18386 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 06:15:48 +0300 Subject: [PATCH 067/139] feat(dashboard): show version in header, restructure footer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #108. - Add `version` field to /stats (from CARGO_PKG_VERSION). - Show `v0.13.1` next to the Numa wordmark in the dashboard header. - Restructure the footer into two semantic rows: Row 1 (paths): Config · Data · Logs (platform-detected) Row 2 (runtime): Upstream · DNSSEC · SRTT · GitHub - Drop Mode from the footer (redundant with Upstream label). - Show only the matching-platform log path instead of both macOS and Linux unconditionally. --- site/dashboard.html | 19 ++++++++++++------- src/api.rs | 2 ++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index 77018fc..de286ab 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -561,6 +561,7 @@ body {
      +
      DNS that governs itself
      @@ -1136,16 +1137,20 @@ async function refresh() { document.getElementById('totalQueries').textContent = formatNumber(q.total); document.getElementById('uptime').textContent = formatUptime(stats.uptime_secs); document.getElementById('uptimeSub').textContent = formatUptimeSub(stats.uptime_secs); + document.getElementById('headerVersion').textContent = stats.version ? 'v' + stats.version : ''; document.getElementById('footerUpstream').textContent = stats.upstream || ''; document.getElementById('footerConfig').textContent = stats.config_path || ''; document.getElementById('footerData').textContent = stats.data_dir || ''; - const modeEl = document.getElementById('footerMode'); - modeEl.textContent = stats.mode || '—'; - modeEl.style.color = stats.mode === 'recursive' ? 'var(--emerald)' : 'var(--amber)'; document.getElementById('footerDnssec').textContent = stats.dnssec ? 'on' : 'off'; document.getElementById('footerDnssec').style.color = stats.dnssec ? 'var(--emerald)' : 'var(--text-dim)'; document.getElementById('footerSrtt').textContent = stats.srtt ? 'on' : 'off'; document.getElementById('footerSrtt').style.color = stats.srtt ? 'var(--emerald)' : 'var(--text-dim)'; + if (!document.getElementById('footerLogs').textContent) { + const isMac = stats.data_dir && stats.data_dir.includes('/usr/local/'); + document.getElementById('footerLogs').textContent = isMac + ? '/usr/local/var/log/numa.log' + : 'journalctl -u numa -f'; + } // LAN status indicator const lanEl = document.getElementById('lanToggle'); @@ -1504,14 +1509,14 @@ refresh(); setInterval(refresh, 2000); -
      +
      Config: · Data: - · Upstream: - · Mode: + · Logs: +
      + Upstream: · DNSSEC: · SRTT: - · Logs: macOS: /usr/local/var/log/numa.log · Linux: journalctl -u numa -f · GitHub
      diff --git a/src/api.rs b/src/api.rs index 17c4614..f8b2702 100644 --- a/src/api.rs +++ b/src/api.rs @@ -160,6 +160,7 @@ struct QueryLogResponse { #[derive(Serialize)] struct StatsResponse { + version: &'static str, uptime_secs: u64, upstream: String, mode: &'static str, // "recursive" or "forward" — never "auto" at runtime @@ -539,6 +540,7 @@ async fn stats(State(ctx): State>) -> Json { }; Json(StatsResponse { + version: env!("CARGO_PKG_VERSION"), uptime_secs: snap.uptime_secs, upstream, mode: ctx.upstream_mode.as_str(), -- 2.34.1 From 1c5e703330bab7ca1a822246f346f79677d52863 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 06:39:29 +0300 Subject: [PATCH 068/139] =?UTF-8?q?fix(dashboard):=20collapse=20header=20o?= =?UTF-8?q?n=20mobile=20(=E2=89=A4700px)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hide tagline, version tag, and Phone Setup on narrow viewports so the header stays single-row: logo + status dot + blocking toggle. Reduces logo font-size from 1.8rem to 1.4rem on mobile. --- site/dashboard.html | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/site/dashboard.html b/site/dashboard.html index de286ab..85b6984 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -552,7 +552,11 @@ body { @media (max-width: 700px) { .stats-row { grid-template-columns: repeat(2, 1fr); } .dashboard { padding: 1rem; } - .header { padding: 1rem; } + .header { padding: 0.8rem 1rem; } + .logo { font-size: 1.4rem; } + .tagline { display: none; } + #headerVersion { display: none; } + #phoneSetup { display: none; } } -- 2.34.1 From 0118ab0f442e638274aaa68b32a3470d00fee4cf Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 13:02:25 +0300 Subject: [PATCH 069/139] feat: embed git SHA in version string via build.rs Adds a build.rs that runs `git describe --tags --always --dirty` and sets NUMA_BUILD_VERSION at compile time. A new `numa::version()` helper returns the build version, falling back to CARGO_PKG_VERSION when git is unavailable (source tarballs, Docker builds without .git). Version strings: tagged release: 0.13.1 commits ahead: 0.13.1+a87f907 uncommitted changes: 0.13.1+a87f907-dirty no git: 0.13.1 Replaces all 6 inline env!("CARGO_PKG_VERSION") call sites with the single version() function. --- build.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++++ src/api.rs | 2 +- src/health.rs | 4 ++-- src/lib.rs | 8 ++++++++ src/main.rs | 10 ++++------ 5 files changed, 62 insertions(+), 9 deletions(-) create mode 100644 build.rs diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..e3375af --- /dev/null +++ b/build.rs @@ -0,0 +1,47 @@ +fn main() { + let git_version = std::process::Command::new("git") + .args(["describe", "--tags", "--always", "--dirty"]) + .output() + .ok() + .filter(|o| o.status.success()) + .and_then(|o| String::from_utf8(o.stdout).ok()) + .map(|s| { + let s = s.trim(); + let s = s.strip_prefix('v').unwrap_or(s); + // "0.13.1" → clean tag → "0.13.1" + // "0.13.1-9-ga87f907" → ahead → "0.13.1+a87f907" + // "0.13.1-9-ga87f907-dirty" → dirty → "0.13.1+a87f907-dirty" + // "a87f907" → no tags → "0.0.0+a87f907" + // "a87f907-dirty" → no tags → "0.0.0+a87f907-dirty" + if let Some((base, rest)) = s.split_once("-") { + // Could be "0.13.1-9-ga87f907[-dirty]" or "a87f907-dirty" + if base.contains('.') { + // Tagged: extract sha from "-N-gSHA[-dirty]" + let parts: Vec<&str> = rest.splitn(3, '-').collect(); + match parts.as_slice() { + [_n, sha] => format!("{}+{}", base, sha.strip_prefix('g').unwrap_or(sha)), + [_n, sha, "dirty"] => { + format!("{}+{}-dirty", base, sha.strip_prefix('g').unwrap_or(sha)) + } + _ => s.to_string(), + } + } else { + // Untagged: "sha-dirty" + format!("0.0.0+{}", s) + } + } else if s.contains('.') { + // Exact tag match: "0.13.1" + s.to_string() + } else { + // Bare sha, no tags at all + format!("0.0.0+{}", s) + } + }); + + if let Some(v) = git_version { + println!("cargo:rustc-env=NUMA_BUILD_VERSION={}", v); + } + + println!("cargo:rerun-if-changed=.git/HEAD"); + println!("cargo:rerun-if-changed=.git/refs/tags/"); +} diff --git a/src/api.rs b/src/api.rs index f8b2702..dd1fe78 100644 --- a/src/api.rs +++ b/src/api.rs @@ -540,7 +540,7 @@ async fn stats(State(ctx): State>) -> Json { }; Json(StatsResponse { - version: env!("CARGO_PKG_VERSION"), + version: crate::version(), uptime_secs: snap.uptime_secs, upstream, mode: ctx.upstream_mode.as_str(), diff --git a/src/health.rs b/src/health.rs index e55c569..5767f4b 100644 --- a/src/health.rs +++ b/src/health.rs @@ -43,7 +43,7 @@ impl HealthMeta { #[cfg(test)] pub fn test_fixture() -> Self { HealthMeta { - version: env!("CARGO_PKG_VERSION"), + version: crate::version(), hostname: "test-host".to_string(), sni: "numa.numa".to_string(), dot_enabled: false, @@ -99,7 +99,7 @@ impl HealthMeta { } HealthMeta { - version: env!("CARGO_PKG_VERSION"), + version: crate::version(), hostname: crate::hostname(), sni: "numa.numa".to_string(), dot_enabled, diff --git a/src/lib.rs b/src/lib.rs index 8933e2a..a9d38fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,6 +34,14 @@ pub(crate) mod testutil; pub type Error = Box; pub type Result = std::result::Result; +/// Build version string. On tagged releases: `0.13.1`. On commits ahead +/// of a tag: `0.13.1+a87f907`. With uncommitted changes: `0.13.1+a87f907-dirty`. +/// Falls back to `CARGO_PKG_VERSION` when built outside a git repo (e.g. +/// from a source tarball). +pub fn version() -> &'static str { + option_env!("NUMA_BUILD_VERSION").unwrap_or(env!("CARGO_PKG_VERSION")) +} + /// Detect the machine hostname via the `hostname` command. Returns the /// full hostname (e.g., `macbook-pro.local`), or `"numa"` if the command /// fails. Call sites that need the short form (e.g., mDNS instance diff --git a/src/main.rs b/src/main.rs index bce7add..faf2e22 100644 --- a/src/main.rs +++ b/src/main.rs @@ -72,7 +72,7 @@ async fn main() -> numa::Result<()> { }; } "version" | "--version" | "-V" => { - eprintln!("numa {}", env!("CARGO_PKG_VERSION")); + eprintln!("numa {}", numa::version()); return Ok(()); } "help" | "--help" | "-h" => { @@ -383,12 +383,10 @@ async fn main() -> numa::Result<()> { }; // Title row: center within the box - let title = format!( - "{b}NUMA{r} {it}DNS that governs itself{r} {d}v{}{r}", - env!("CARGO_PKG_VERSION") - ); + let ver = numa::version(); + let title = format!("{b}NUMA{r} {it}DNS that governs itself{r} {d}v{ver}{r}",); // The title contains ANSI codes; visible length is ~38 chars. Pad to fill the box. - let title_visible_len = 4 + 2 + 24 + 2 + 1 + env!("CARGO_PKG_VERSION").len() + 1; + let title_visible_len = 4 + 2 + 24 + 2 + 1 + ver.len() + 1; let title_pad = w.saturating_sub(title_visible_len); eprintln!("\n{o} ╔{bar_top}╗{r}"); eprint!("{o} ║{r} {title}"); -- 2.34.1 From 30bb7365c9b2f0faa0e2456b4b6ac04f84109d1f Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 13:18:56 +0300 Subject: [PATCH 070/139] refactor: robust git-describe parsing for pre-release tags Switch to --long flag so format is always TAG-N-gSHA[-dirty], then split from the right. Handles pre-release tags (v0.14.0-rc1) that broke the previous left-split approach. Remove ineffective directory watch on .git/refs/tags/. Trim comments. --- build.rs | 69 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/build.rs b/build.rs index e3375af..463100c 100644 --- a/build.rs +++ b/build.rs @@ -1,47 +1,48 @@ fn main() { + // --long forces "TAG-N-gSHA[-dirty]" format even on exact tag matches, + // making parsing unambiguous for pre-release tags like v0.14.0-rc1. let git_version = std::process::Command::new("git") - .args(["describe", "--tags", "--always", "--dirty"]) + .args(["describe", "--tags", "--always", "--dirty", "--long"]) .output() .ok() .filter(|o| o.status.success()) .and_then(|o| String::from_utf8(o.stdout).ok()) - .map(|s| { - let s = s.trim(); - let s = s.strip_prefix('v').unwrap_or(s); - // "0.13.1" → clean tag → "0.13.1" - // "0.13.1-9-ga87f907" → ahead → "0.13.1+a87f907" - // "0.13.1-9-ga87f907-dirty" → dirty → "0.13.1+a87f907-dirty" - // "a87f907" → no tags → "0.0.0+a87f907" - // "a87f907-dirty" → no tags → "0.0.0+a87f907-dirty" - if let Some((base, rest)) = s.split_once("-") { - // Could be "0.13.1-9-ga87f907[-dirty]" or "a87f907-dirty" - if base.contains('.') { - // Tagged: extract sha from "-N-gSHA[-dirty]" - let parts: Vec<&str> = rest.splitn(3, '-').collect(); - match parts.as_slice() { - [_n, sha] => format!("{}+{}", base, sha.strip_prefix('g').unwrap_or(sha)), - [_n, sha, "dirty"] => { - format!("{}+{}-dirty", base, sha.strip_prefix('g').unwrap_or(sha)) - } - _ => s.to_string(), - } - } else { - // Untagged: "sha-dirty" - format!("0.0.0+{}", s) - } - } else if s.contains('.') { - // Exact tag match: "0.13.1" - s.to_string() - } else { - // Bare sha, no tags at all - format!("0.0.0+{}", s) - } - }); + .and_then(|raw| parse_git_describe(raw.trim())); if let Some(v) = git_version { println!("cargo:rustc-env=NUMA_BUILD_VERSION={}", v); } println!("cargo:rerun-if-changed=.git/HEAD"); - println!("cargo:rerun-if-changed=.git/refs/tags/"); +} + +/// Parse `git describe --long` output into a SemVer-compatible string. +/// "v0.13.1-0-ga87f907" → "0.13.1" +/// "v0.13.1-9-ga87f907" → "0.13.1+a87f907" +/// "v0.14.0-rc1-0-ga87f907" → "0.14.0-rc1" +/// "v0.14.0-rc1-3-ga87f907-dirty" → "0.14.0-rc1+a87f907-dirty" +/// "a87f907" → "0.0.0+a87f907" +fn parse_git_describe(s: &str) -> Option { + let s = s.strip_prefix('v').unwrap_or(s); + let dirty = s.ends_with("-dirty"); + let s = s.strip_suffix("-dirty").unwrap_or(s); + + // --long format: TAG-N-gSHA. Split from the right so tags with hyphens work. + let gpos = s.rfind("-g")?; + let sha = &s[gpos + 2..]; + let rest = &s[..gpos]; + let npos = rest.rfind('-')?; + let n: u32 = rest[npos + 1..].parse().ok()?; + let tag = &rest[..npos]; + + if tag.is_empty() { + return Some(format!("0.0.0+{}", sha)); + } + + Some(match (n, dirty) { + (0, false) => tag.to_string(), + (0, true) => format!("{}+{}-dirty", tag, sha), + (_, false) => format!("{}+{}", tag, sha), + (_, true) => format!("{}+{}-dirty", tag, sha), + }) } -- 2.34.1 From b69cc89d385f80ae86d5dabcb1fd9fd5cb554520 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 15:12:00 +0300 Subject: [PATCH 071/139] fix(dashboard): skip allowlist re-render while input has focus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The polling refresh replaced the entire allowlist panel innerHTML every 2 seconds, destroying the input field mid-typing. Users had to paste-and-enter faster than the refresh interval — #106 reported this as text "timing out and erasing." Guard: skip renderAllowlist() when allowDomainInput has focus. --- site/dashboard.html | 1 + 1 file changed, 1 insertion(+) diff --git a/site/dashboard.html b/site/dashboard.html index 85b6984..d3b1820 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -1354,6 +1354,7 @@ function renderBlockingInfo(info) { } function renderAllowlist(entries) { + if (document.activeElement && document.activeElement.id === 'allowDomainInput') return; const el = document.getElementById('blockingAllowlist'); const count = entries.length; el.innerHTML = ` -- 2.34.1 From d3eab73a31b2065b713bcb47dfb3d08a9fbcb451 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 16:13:15 +0300 Subject: [PATCH 072/139] fix: use sort_by_key to satisfy clippy unnecessary_sort_by --- src/system_dns.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index 826101d..ca587b8 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -211,7 +211,7 @@ fn discover_macos() -> SystemDnsInfo { } // Sort longest suffix first for most-specific matching - rules.sort_by(|a, b| b.suffix.len().cmp(&a.suffix.len())); + rules.sort_by_key(|r| std::cmp::Reverse(r.suffix.len())); for rule in &rules { info!( -- 2.34.1 From 65e65028a063521b05801b6c9aec34cdd3b325b8 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 16:59:54 +0300 Subject: [PATCH 073/139] fix(windows): separate service lifecycle from install flow service start/stop/restart/status now map to proper SCM operations instead of re-running the full install/uninstall flow. On re-install, stop the running service first so the binary can be overwritten. --- src/main.rs | 9 ++-- src/system_dns.rs | 113 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 5 deletions(-) diff --git a/src/main.rs b/src/main.rs index 88f2128..b8893b3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,7 @@ -use numa::system_dns::{install_service, restart_service, service_status, uninstall_service}; +use numa::system_dns::{ + install_service, restart_service, service_status, start_service, stop_service, + uninstall_service, +}; fn main() -> numa::Result<()> { env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) @@ -28,8 +31,8 @@ fn main() -> numa::Result<()> { let sub = std::env::args().nth(2).unwrap_or_default(); eprintln!("\x1b[1;38;2;192;98;58mNuma\x1b[0m — service management\n"); return match sub.as_str() { - "start" => install_service().map_err(|e| e.into()), - "stop" => uninstall_service().map_err(|e| e.into()), + "start" => start_service().map_err(|e| e.into()), + "stop" => stop_service().map_err(|e| e.into()), "restart" => restart_service().map_err(|e| e.into()), "status" => service_status().map_err(|e| e.into()), _ => { diff --git a/src/system_dns.rs b/src/system_dns.rs index ca587b8..c4279cd 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -698,6 +698,13 @@ fn install_windows() -> Result<(), String> { let needs_reboot = disable_dnscache()?; + // On re-install, stop the running service first so the binary can be + // overwritten (SCM holds a handle to the exe while it's running). + let reinstall = is_service_registered(); + if reinstall { + stop_service_scm(); + } + // Copy the binary to a stable path under ProgramData and register it // as a real Windows service (SCM-managed, boot-time, auto-restart). let service_exe = install_service_binary()?; @@ -864,6 +871,41 @@ fn delete_service_scm() { } } +/// Check whether the service is registered with SCM (regardless of state). +#[cfg(windows)] +fn is_service_registered() -> bool { + run_sc(&["query", crate::windows_service::SERVICE_NAME]) + .map(|o| { + // sc query exits 0 if the service exists (running or stopped). + // Error 1060 = "service does not exist". + if o.status.success() { + return true; + } + let text = String::from_utf8_lossy(&o.stdout); + !text.contains("1060") + }) + .unwrap_or(false) +} + +/// Print service state from SCM. +#[cfg(windows)] +fn service_status_windows() -> Result<(), String> { + let out = run_sc(&["query", crate::windows_service::SERVICE_NAME])?; + let text = String::from_utf8_lossy(&out.stdout); + if text.contains("1060") { + eprintln!(" Service is not installed.\n"); + return Ok(()); + } + // Parse STATE line, e.g. "STATE : 4 RUNNING" + let state = text + .lines() + .find(|l| l.contains("STATE")) + .map(|l| l.trim().to_string()) + .unwrap_or_else(|| "unknown".to_string()); + eprintln!(" {}\n", state); + Ok(()) +} + #[cfg(windows)] fn uninstall_windows() -> Result<(), String> { // Stop + remove the service before touching DNS, so port 53 is released @@ -1167,6 +1209,62 @@ pub fn install_service() -> Result<(), String> { result } +/// Start the service. If already installed, just starts it via the platform +/// service manager. If not installed, falls through to a full install. +pub fn start_service() -> Result<(), String> { + #[cfg(target_os = "macos")] + { + install_service() + } + #[cfg(target_os = "linux")] + { + install_service() + } + #[cfg(windows)] + { + if is_service_registered() { + start_service_scm()?; + eprintln!(" Service started.\n"); + Ok(()) + } else { + install_service() + } + } + #[cfg(not(any(target_os = "macos", target_os = "linux", windows)))] + { + Err("service start not supported on this OS".to_string()) + } +} + +/// Stop the service without uninstalling it. +pub fn stop_service() -> Result<(), String> { + #[cfg(target_os = "macos")] + { + uninstall_service() + } + #[cfg(target_os = "linux")] + { + uninstall_service() + } + #[cfg(windows)] + { + let out = run_sc(&["stop", crate::windows_service::SERVICE_NAME])?; + if !out.status.success() { + let text = String::from_utf8_lossy(&out.stdout); + // 1062 = not started, 1060 = does not exist + if !text.contains("1062") && !text.contains("1060") { + return Err(format!("sc stop failed: {}", text.trim())); + } + } + eprintln!(" Service stopped.\n"); + Ok(()) + } + #[cfg(not(any(target_os = "macos", target_os = "linux", windows)))] + { + Err("service stop not supported on this OS".to_string()) + } +} + /// Uninstall the Numa system service. pub fn uninstall_service() -> Result<(), String> { let _ = untrust_ca(); @@ -1236,7 +1334,14 @@ pub fn restart_service() -> Result<(), String> { eprintln!(" Service restarted → {}\n", version); Ok(()) } - #[cfg(not(any(target_os = "macos", target_os = "linux")))] + #[cfg(windows)] + { + stop_service_scm(); + start_service_scm()?; + eprintln!(" Service restarted.\n"); + Ok(()) + } + #[cfg(not(any(target_os = "macos", target_os = "linux", windows)))] { Err("service restart not supported on this OS".to_string()) } @@ -1252,7 +1357,11 @@ pub fn service_status() -> Result<(), String> { { service_status_linux() } - #[cfg(not(any(target_os = "macos", target_os = "linux")))] + #[cfg(windows)] + { + service_status_windows() + } + #[cfg(not(any(target_os = "macos", target_os = "linux", windows)))] { Err("service status not supported on this OS".to_string()) } -- 2.34.1 From da40a8dbfccd06e4ff49aab1ee5656659b511aec Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 18:08:48 +0300 Subject: [PATCH 074/139] ci: fetch full history on Windows so build.rs embeds git SHA --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0ad7e45..33e25a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,6 +56,8 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - name: build -- 2.34.1 From 6789c321bc6938c7c5b0254720a5e548498e1243 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 18:35:09 +0300 Subject: [PATCH 075/139] fix(windows): defer DNS redirect until port 53 is free Probe port 53 after disabling Dnscache instead of assuming reboot is needed. Skip DNS redirect when port is blocked (service does it on first boot). Fix readiness probe: TCP connect to API port instead of broken UDP send_to that always succeeded. --- src/system_dns.rs | 89 +++++++++++++++++++++++++++--------------- src/windows_service.rs | 17 ++++++++ 2 files changed, 75 insertions(+), 31 deletions(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index c4279cd..35490ae 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -572,7 +572,7 @@ fn windows_backup_path() -> std::path::PathBuf { #[cfg(windows)] fn disable_dnscache() -> Result { - // Check if Dnscache is running (it holds port 53 at kernel level) + // Check if Dnscache is running (it can hold port 53) let output = std::process::Command::new("sc") .args(["query", "Dnscache"]) .output() @@ -603,8 +603,16 @@ fn disable_dnscache() -> Result { return Err("failed to disable Dnscache via registry (run as Administrator?)".into()); } - eprintln!(" Dnscache disabled. A reboot is required to free port 53."); - Ok(true) + // Dnscache is disabled for next boot. Check whether port 53 is + // actually blocked right now — on many Windows configurations + // Dnscache doesn't bind port 53 even while running. + let port_blocked = std::net::UdpSocket::bind("127.0.0.1:53").is_err(); + if port_blocked { + eprintln!(" Dnscache disabled. A reboot is required to free port 53."); + } else { + eprintln!(" Dnscache disabled. Port 53 is free."); + } + Ok(port_blocked) } #[cfg(windows)] @@ -671,31 +679,6 @@ fn install_windows() -> Result<(), String> { std::fs::write(&path, json).map_err(|e| format!("failed to write backup: {}", e))?; } - for name in interfaces.keys() { - let status = std::process::Command::new("netsh") - .args([ - "interface", - "ipv4", - "set", - "dnsservers", - name, - "static", - "127.0.0.1", - "primary", - ]) - .status() - .map_err(|e| format!("failed to set DNS for {}: {}", name, e))?; - - if status.success() { - eprintln!(" set DNS for \"{}\" -> 127.0.0.1", name); - } else { - eprintln!( - " warning: failed to set DNS for \"{}\" (run as Administrator?)", - name - ); - } - } - let needs_reboot = disable_dnscache()?; // On re-install, stop the running service first so the binary can be @@ -710,9 +693,14 @@ fn install_windows() -> Result<(), String> { let service_exe = install_service_binary()?; register_service_scm(&service_exe)?; - // If no reboot is pending (Dnscache wasn't running, port 53 free), - // start the service immediately. Otherwise it'll launch on next boot. - if !needs_reboot { + if needs_reboot { + // Dnscache still holds port 53 until reboot. Do NOT redirect DNS + // yet — nothing is listening on 127.0.0.1:53, so redirecting now + // would kill DNS. The service will call redirect_dns_to_localhost() + // on its first startup after reboot. + } else { + redirect_dns_with_interfaces(&interfaces)?; + match start_service_scm() { Ok(_) => eprintln!(" Service started."), Err(e) => eprintln!( @@ -756,6 +744,45 @@ fn run_sc(args: &[&str]) -> Result { Ok(out) } +/// Point all active network interfaces at 127.0.0.1 so Numa handles DNS. +/// Called from the service on first boot after a reboot that freed Dnscache. +#[cfg(windows)] +pub fn redirect_dns_to_localhost() -> Result<(), String> { + let interfaces = get_windows_interfaces()?; + redirect_dns_with_interfaces(&interfaces) +} + +#[cfg(windows)] +fn redirect_dns_with_interfaces( + interfaces: &std::collections::HashMap, +) -> Result<(), String> { + for name in interfaces.keys() { + let status = std::process::Command::new("netsh") + .args([ + "interface", + "ipv4", + "set", + "dnsservers", + name, + "static", + "127.0.0.1", + "primary", + ]) + .status() + .map_err(|e| format!("failed to set DNS for {}: {}", name, e))?; + + if status.success() { + eprintln!(" set DNS for \"{}\" -> 127.0.0.1", name); + } else { + eprintln!( + " warning: failed to set DNS for \"{}\" (run as Administrator?)", + name + ); + } + } + Ok(()) +} + /// Copy the currently-running binary to the service install location. SCM /// keeps a handle to this path, so it must be stable across user sessions. #[cfg(windows)] diff --git a/src/windows_service.rs b/src/windows_service.rs index a1403d7..a363359 100644 --- a/src/windows_service.rs +++ b/src/windows_service.rs @@ -83,6 +83,23 @@ fn run_service() -> windows_service::Result<()> { let _ = server_done_tx.send(()); }); + // Wait for the API to be ready, then ensure DNS points at localhost. + // On first boot after install (Dnscache was disabled, reboot freed + // port 53), the installer deferred the DNS redirect — do it now. + let api_up = (0..20).any(|i| { + if i > 0 { + std::thread::sleep(Duration::from_millis(500)); + } + std::net::TcpStream::connect(("127.0.0.1", crate::config::DEFAULT_API_PORT)).is_ok() + }); + if api_up { + if let Err(e) = crate::system_dns::redirect_dns_to_localhost() { + log::warn!("could not redirect DNS to localhost: {}", e); + } + } else { + log::error!("numa API did not start within 10s — DNS not redirected"); + } + // Wait for either SCM stop or server termination. loop { if shutdown_rx.recv_timeout(Duration::from_millis(500)).is_ok() { -- 2.34.1 From f0a1dd7106b632957e9a5cfef5e16910ce599362 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 19:01:34 +0300 Subject: [PATCH 076/139] fix(dashboard): hide logs path on Windows (no log sink yet) --- site/dashboard.html | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index d3b1820..0e26752 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -1150,10 +1150,16 @@ async function refresh() { document.getElementById('footerSrtt').textContent = stats.srtt ? 'on' : 'off'; document.getElementById('footerSrtt').style.color = stats.srtt ? 'var(--emerald)' : 'var(--text-dim)'; if (!document.getElementById('footerLogs').textContent) { + const isWin = stats.data_dir && stats.data_dir.includes(':\\'); const isMac = stats.data_dir && stats.data_dir.includes('/usr/local/'); - document.getElementById('footerLogs').textContent = isMac - ? '/usr/local/var/log/numa.log' - : 'journalctl -u numa -f'; + const logsEl = document.getElementById('footerLogs'); + if (isWin) { + document.getElementById('footerLogsWrap').style.display = 'none'; + } else { + logsEl.textContent = isMac + ? '/usr/local/var/log/numa.log' + : 'journalctl -u numa -f'; + } } // LAN status indicator @@ -1517,7 +1523,7 @@ setInterval(refresh, 2000);
      Config: · Data: - · Logs: + · Logs:
      Upstream: · DNSSEC: -- 2.34.1 From 9bea038cb607c044b185979ddb9260d3a72bd9f0 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 19:12:42 +0300 Subject: [PATCH 077/139] fix(windows): unify config/data dir and add service log file config_dir() on Windows now returns data_dir() (ProgramData) so config, services.json, and log file are in the same place for both interactive and service contexts. Service mode writes logs to numa.log via env_logger pipe. Dashboard shows correct log path per OS. --- site/dashboard.html | 13 +++++-------- src/lib.rs | 7 ++----- src/main.rs | 31 +++++++++++++++++++++---------- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index 0e26752..fa2d965 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -1153,13 +1153,10 @@ async function refresh() { const isWin = stats.data_dir && stats.data_dir.includes(':\\'); const isMac = stats.data_dir && stats.data_dir.includes('/usr/local/'); const logsEl = document.getElementById('footerLogs'); - if (isWin) { - document.getElementById('footerLogsWrap').style.display = 'none'; - } else { - logsEl.textContent = isMac - ? '/usr/local/var/log/numa.log' - : 'journalctl -u numa -f'; - } + logsEl.textContent = isWin + ? stats.data_dir + '\\numa.log' + : isMac ? '/usr/local/var/log/numa.log' + : 'journalctl -u numa -f'; } // LAN status indicator @@ -1523,7 +1520,7 @@ setInterval(refresh, 2000);
      Config: · Data: - · Logs: + · Logs:
      Upstream: · DNSSEC: diff --git a/src/lib.rs b/src/lib.rs index 8bb28d6..a16568b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -101,14 +101,11 @@ where /// Linux root daemon: /var/lib/numa (FHS) — falls back to /usr/local/var/numa /// if a pre-v0.10.1 install already lives there. /// macOS root daemon: /usr/local/var/numa (Homebrew prefix) -/// Windows: %APPDATA%\numa +/// Windows: %PROGRAMDATA%\numa (same as data_dir — no per-user config on Windows) pub fn config_dir() -> std::path::PathBuf { #[cfg(windows)] { - std::path::PathBuf::from( - std::env::var("APPDATA").unwrap_or_else(|_| "C:\\ProgramData".into()), - ) - .join("numa") + data_dir() } #[cfg(not(windows))] { diff --git a/src/main.rs b/src/main.rs index b8893b3..34bf747 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,21 +4,32 @@ use numa::system_dns::{ }; fn main() -> numa::Result<()> { + // Handle CLI subcommands + let arg1 = std::env::args().nth(1).unwrap_or_default(); + + #[cfg(windows)] + if arg1 == "--service" { + // Running under SCM — stderr goes nowhere. Redirect logs to a file. + let log_path = numa::data_dir().join("numa.log"); + let log_file = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(&log_path) + .expect("failed to open log file"); + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) + .format_timestamp_millis() + .target(env_logger::Target::Pipe(Box::new(log_file))) + .init(); + numa::windows_service::run_as_service() + .map_err(|e| format!("windows service dispatcher failed: {}", e))?; + return Ok(()); + } + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) .format_timestamp_millis() .init(); - // Handle CLI subcommands - let arg1 = std::env::args().nth(1).unwrap_or_default(); match arg1.as_str() { - #[cfg(windows)] - "--service" => { - // Entry point used by Windows SCM (`sc create … binPath="numa.exe --service"`). - // Blocks until SCM sends Stop; never returns normally. - numa::windows_service::run_as_service() - .map_err(|e| format!("windows service dispatcher failed: {}", e))?; - return Ok(()); - } "install" => { eprintln!("\x1b[1;38;2;192;98;58mNuma\x1b[0m — installing\n"); return install_service().map_err(|e| e.into()); -- 2.34.1 From 9f08d8b4896bc2b0a2f72ca8bb18dd393ad9ef93 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 19:21:56 +0300 Subject: [PATCH 078/139] fix(windows): stop service before port probe, wait for full exit Stop the running service before disabling Dnscache so the port 53 probe sees the real state (not Numa's own binding). Wait for SCM STOPPED state before copying the binary to avoid os error 32 (file in use). --- src/system_dns.rs | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index 35490ae..7e2d16a 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -679,15 +679,15 @@ fn install_windows() -> Result<(), String> { std::fs::write(&path, json).map_err(|e| format!("failed to write backup: {}", e))?; } - let needs_reboot = disable_dnscache()?; - // On re-install, stop the running service first so the binary can be - // overwritten (SCM holds a handle to the exe while it's running). - let reinstall = is_service_registered(); - if reinstall { + // overwritten and port 53 is released for the Dnscache probe. + if is_service_registered() { + eprintln!(" Stopping existing service..."); stop_service_scm(); } + let needs_reboot = disable_dnscache()?; + // Copy the binary to a stable path under ProgramData and register it // as a real Windows service (SCM-managed, boot-time, auto-restart). let service_exe = install_service_binary()?; @@ -880,14 +880,24 @@ fn start_service_scm() -> Result<(), String> { Ok(()) } -/// Stop the service. Idempotent — already-stopped or missing service logs -/// a warning but doesn't error, since both callers (install re-run, -/// uninstall) want best-effort cleanup rather than hard failure. +/// Stop the service and wait for it to fully exit. Idempotent — +/// already-stopped or missing service is not an error. #[cfg(windows)] fn stop_service_scm() { - if let Err(e) = run_sc(&["stop", crate::windows_service::SERVICE_NAME]) { - log::warn!("sc stop failed: {}", e); + let name = crate::windows_service::SERVICE_NAME; + let _ = run_sc(&["stop", name]); + // Wait up to 10s for the service to reach STOPPED state so the + // binary file handle is released before we try to overwrite it. + for _ in 0..20 { + if let Ok(out) = run_sc(&["query", name]) { + let text = String::from_utf8_lossy(&out.stdout); + if text.contains("STOPPED") || text.contains("1060") { + return; + } + } + std::thread::sleep(std::time::Duration::from_millis(500)); } + eprintln!(" warning: service did not stop within 10s"); } /// Remove the service from SCM. Idempotent — see `stop_service_scm`. -- 2.34.1 From fe9f31616e574b9c3c4ae97b0b646de6e65705ce Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 19:31:26 +0300 Subject: [PATCH 079/139] test: add SCM output parsing and config path regression tests Extract parse_sc_registered and parse_sc_state as testable pure functions. 8 new tests covering: service registration detection, service state parsing, and Windows config_dir == data_dir invariant. --- src/system_dns.rs | 95 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 17 deletions(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index 7e2d16a..941c053 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -912,35 +912,43 @@ fn delete_service_scm() { #[cfg(windows)] fn is_service_registered() -> bool { run_sc(&["query", crate::windows_service::SERVICE_NAME]) - .map(|o| { - // sc query exits 0 if the service exists (running or stopped). - // Error 1060 = "service does not exist". - if o.status.success() { - return true; - } - let text = String::from_utf8_lossy(&o.stdout); - !text.contains("1060") - }) + .map(|o| parse_sc_registered(o.status.success(), &String::from_utf8_lossy(&o.stdout))) .unwrap_or(false) } +/// Parse `sc query` output to determine if a service is registered. +/// Extracted for testability — the actual `sc` call is in `is_service_registered`. +#[cfg(any(windows, test))] +fn parse_sc_registered(exit_success: bool, stdout: &str) -> bool { + if exit_success { + return true; + } + // Error 1060 = "The specified service does not exist as an installed service." + !stdout.contains("1060") +} + /// Print service state from SCM. #[cfg(windows)] fn service_status_windows() -> Result<(), String> { let out = run_sc(&["query", crate::windows_service::SERVICE_NAME])?; let text = String::from_utf8_lossy(&out.stdout); - if text.contains("1060") { - eprintln!(" Service is not installed.\n"); - return Ok(()); + let display = parse_sc_state(&text); + eprintln!(" {}\n", display); + Ok(()) +} + +/// Parse the STATE line from `sc query` output. Returns a human-readable +/// string like "STATE : 4 RUNNING" or "Service is not installed." +#[cfg(any(windows, test))] +fn parse_sc_state(sc_output: &str) -> String { + if sc_output.contains("1060") { + return "Service is not installed.".to_string(); } - // Parse STATE line, e.g. "STATE : 4 RUNNING" - let state = text + sc_output .lines() .find(|l| l.contains("STATE")) .map(|l| l.trim().to_string()) - .unwrap_or_else(|| "unknown".to_string()); - eprintln!(" {}\n", state); - Ok(()) + .unwrap_or_else(|| "unknown".to_string()) } #[cfg(windows)] @@ -2132,4 +2140,57 @@ Wireless LAN adapter Wi-Fi: let err = std::io::Error::from(std::io::ErrorKind::AddrInUse); assert!(try_port53_advisory("not-an-address", &err).is_none()); } + + #[test] + fn sc_query_running_service_is_registered() { + assert!(parse_sc_registered(true, "")); + } + + #[test] + fn sc_query_stopped_service_is_registered() { + let output = "SERVICE_NAME: Numa\n TYPE: 10 WIN32_OWN\n STATE: 1 STOPPED\n"; + assert!(parse_sc_registered(true, output)); + } + + #[test] + fn sc_query_missing_service_not_registered() { + let output = "[SC] EnumQueryServicesStatus:OpenService FAILED 1060:\n\nThe specified service does not exist as an installed service.\n"; + assert!(!parse_sc_registered(false, output)); + } + + #[test] + fn sc_query_other_error_assumes_registered() { + // Permission denied or other errors — don't assume unregistered. + let output = "[SC] OpenService FAILED 5:\n\nAccess is denied.\n"; + assert!(parse_sc_registered(false, output)); + } + + #[test] + fn parse_sc_state_running() { + let output = "SERVICE_NAME: Numa\n TYPE : 10 WIN32_OWN_PROCESS\n STATE : 4 RUNNING\n WIN32_EXIT_CODE : 0\n"; + assert!(parse_sc_state(output).contains("RUNNING")); + } + + #[test] + fn parse_sc_state_stopped() { + let output = "SERVICE_NAME: Numa\n TYPE : 10 WIN32_OWN_PROCESS\n STATE : 1 STOPPED\n"; + assert!(parse_sc_state(output).contains("STOPPED")); + } + + #[test] + fn parse_sc_state_not_installed() { + let output = "[SC] EnumQueryServicesStatus:OpenService FAILED 1060:\n\n"; + assert_eq!(parse_sc_state(output), "Service is not installed."); + } + + #[test] + fn parse_sc_state_empty_output() { + assert_eq!(parse_sc_state(""), "unknown"); + } + + #[cfg(windows)] + #[test] + fn windows_config_dir_equals_data_dir() { + assert_eq!(crate::config_dir(), crate::data_dir()); + } } -- 2.34.1 From 9e56054f37d4c2e1e03b6468effbfb695a0e3c8a Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 19:56:44 +0300 Subject: [PATCH 080/139] ci: add integration tests for install/uninstall lifecycle Release-build + install/verify/re-install/uninstall cycle on Linux and macOS. Runs after lint/test passes (needs dependency). Cleanup step uses if: always() to handle cancellation. --- .github/workflows/ci.yml | 50 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 33e25a4..4b4972e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,3 +71,53 @@ jobs: with: name: numa-windows-x86_64 path: target/debug/numa.exe + + integration-linux: + needs: [check] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: build + run: cargo build --release + - name: install / verify / re-install / uninstall + run: | + sudo ./target/release/numa install + sleep 2 + curl -sf http://127.0.0.1:5380/health + dig @127.0.0.1 example.com +short +timeout=5 | grep -q '.' + sudo ./target/release/numa install + sleep 2 + curl -sf http://127.0.0.1:5380/health + sudo ./target/release/numa uninstall + sleep 1 + ! curl -sf http://127.0.0.1:5380/health 2>/dev/null + - name: cleanup + if: always() + run: sudo ./target/release/numa uninstall 2>/dev/null || true + + integration-macos: + needs: [check-macos] + runs-on: macos-latest + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: build + run: cargo build --release + - name: install / verify / re-install / uninstall + run: | + sudo ./target/release/numa install + sleep 2 + curl -sf http://127.0.0.1:5380/health + dig @127.0.0.1 example.com +short +timeout=5 | grep -q '.' + sudo ./target/release/numa install + sleep 2 + curl -sf http://127.0.0.1:5380/health + sudo ./target/release/numa uninstall + sleep 1 + ! curl -sf http://127.0.0.1:5380/health 2>/dev/null + - name: cleanup + if: always() + run: sudo ./target/release/numa uninstall 2>/dev/null || true -- 2.34.1 From 99af97a67bc32ff478c7e44a9c99b825d6b374a5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 16 Apr 2026 20:20:53 +0300 Subject: [PATCH 081/139] ci: wait for DNS recovery after uninstall on Linux systemd-resolved needs a moment to restore its stub listener after the numa drop-in is removed. Without a wait, the runner can't resolve GitHub's API to report job completion. --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4b4972e..502279d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -93,6 +93,10 @@ jobs: sudo ./target/release/numa uninstall sleep 1 ! curl -sf http://127.0.0.1:5380/health 2>/dev/null + # Wait for systemd-resolved to restore DNS so the runner can + # phone home to GitHub after the job completes. + sleep 3 + dig @127.0.0.1 github.com +short +timeout=5 || dig github.com +short +timeout=5 || true - name: cleanup if: always() run: sudo ./target/release/numa uninstall 2>/dev/null || true -- 2.34.1 From 34b75833b8da63e39b9df83efc069f5008b6e41d Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Fri, 17 Apr 2026 01:11:20 +0300 Subject: [PATCH 082/139] ci: poll for DNS recovery in cleanup, not test step Move DNS recovery wait into the cleanup step (if: always) so it runs regardless of test outcome. Use getent hosts loop instead of sleep+dig to match what post-steps actually use for resolution. --- .github/workflows/ci.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 502279d..f29c51a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -93,13 +93,16 @@ jobs: sudo ./target/release/numa uninstall sleep 1 ! curl -sf http://127.0.0.1:5380/health 2>/dev/null - # Wait for systemd-resolved to restore DNS so the runner can - # phone home to GitHub after the job completes. - sleep 3 - dig @127.0.0.1 github.com +short +timeout=5 || dig github.com +short +timeout=5 || true - name: cleanup if: always() - run: sudo ./target/release/numa uninstall 2>/dev/null || true + run: | + sudo ./target/release/numa uninstall 2>/dev/null || true + # Wait for systemd-resolved to fully restore DNS so post-job + # steps (rust-cache upload, log shipping) can reach GitHub. + for i in $(seq 1 30); do + if getent hosts github.com >/dev/null 2>&1; then break; fi + sleep 1 + done integration-macos: needs: [check-macos] -- 2.34.1 From 1d9495c013a9ee6b2fd1a79e3aad6c71369d95c7 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Fri, 17 Apr 2026 01:32:36 +0300 Subject: [PATCH 083/139] ci: bridge DNS gap with direct upstream instead of polling systemd-resolved has a ~40s reconfiguration stall after restart (systemd #22521) that breaks the GHA runner's persistent connection to results-receiver.actions.githubusercontent.com. Polling for DNS recovery isn't enough since the .NET runner agent caches DNS at the connection-pool level. Replace the broken stub-resolv symlink with a direct upstream so DNS works instantly. --- .github/workflows/ci.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f29c51a..e116744 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,12 +97,14 @@ jobs: if: always() run: | sudo ./target/release/numa uninstall 2>/dev/null || true - # Wait for systemd-resolved to fully restore DNS so post-job - # steps (rust-cache upload, log shipping) can reach GitHub. - for i in $(seq 1 30); do - if getent hosts github.com >/dev/null 2>&1; then break; fi - sleep 1 - done + # systemd-resolved has a ~40s DNS reconfiguration stall after + # restart (systemd issue #22521) that breaks the runner agent's + # connection to GitHub. Bridge it by replacing the stub-resolv + # symlink with a direct upstream — DNS works instantly and the + # runner can phone home for post-job steps. + sudo rm -f /etc/resolv.conf + echo "nameserver 8.8.8.8" | sudo tee /etc/resolv.conf > /dev/null + getent hosts github.com >/dev/null integration-macos: needs: [check-macos] -- 2.34.1 From 5f77af55e9595110b4d3da39084b5d5578af77d8 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Fri, 17 Apr 2026 03:39:21 +0300 Subject: [PATCH 084/139] fix(forward): track SRTT for DoT upstreams, not just UDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SRTT ordering + failure penalty path was UDP-only, so a DoT primary in a forwarding-rule pool was never deprioritized on failure and all DoT entries tied at INITIAL_SRTT_MS in the sort key. With [[forwarding]] now accepting arrays of upstreams, DoT pools are a first-class case and need the same healthiest-first behavior the default pool gets for UDP. - Add Upstream::tracked_ip() → Some(ip) for Udp/Dot, None for Doh (DoH has no stable IP — reqwest pools connections by hostname). - Rewire the three SRTT call sites in forward_with_failover_raw. - Hoist srtt.read() out of the candidate-scoring loop — one lock per query instead of N (matters now that pools commonly have N>1). - Drop unused #[derive(Debug)] on UpstreamPool and ForwardingRule. - Regression tests: udp_failure_records_in_srtt + dot_failure_records_in_srtt. --- src/forward.rs | 103 ++++++++++++++++++++++++++++++++++++++-------- src/system_dns.rs | 2 +- 2 files changed, 87 insertions(+), 18 deletions(-) diff --git a/src/forward.rs b/src/forward.rs index 8bb548e..9bfa426 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -25,6 +25,18 @@ pub enum Upstream { }, } +impl Upstream { + /// IP address to key SRTT tracking on, if the upstream has a stable one. + /// `Doh` routes through a URL + connection pool, so there's no single IP + /// to track; SRTT is skipped for it. + pub fn tracked_ip(&self) -> Option { + match self { + Upstream::Udp(addr) | Upstream::Dot { addr, .. } => Some(addr.ip()), + Upstream::Doh { .. } => None, + } + } +} + impl PartialEq for Upstream { fn eq(&self, other: &Self) -> bool { match (self, other) { @@ -118,7 +130,7 @@ fn build_dot_connector() -> Result { ))) } -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct UpstreamPool { primary: Vec, fallback: Vec, @@ -345,18 +357,17 @@ pub async fn forward_with_failover_raw( timeout_duration: Duration, hedge_delay: Duration, ) -> Result> { - let mut candidates: Vec<(usize, u64)> = pool - .primary - .iter() - .enumerate() - .map(|(i, u)| { - let rtt = match u { - Upstream::Udp(addr) => srtt.read().unwrap().get(addr.ip()), - _ => 0, - }; - (i, rtt) - }) - .collect(); + let mut candidates: Vec<(usize, u64)> = { + let srtt_read = srtt.read().unwrap(); + pool.primary + .iter() + .enumerate() + .map(|(i, u)| { + let rtt = u.tracked_ip().map(|ip| srtt_read.get(ip)).unwrap_or(0); + (i, rtt) + }) + .collect() + }; candidates.sort_by_key(|&(_, rtt)| rtt); let all_upstreams: Vec<&Upstream> = candidates @@ -380,15 +391,15 @@ pub async fn forward_with_failover_raw( }; match result { Ok(resp) => { - if let Upstream::Udp(addr) = upstream { + if let Some(ip) = upstream.tracked_ip() { let rtt_ms = start.elapsed().as_millis() as u64; - srtt.write().unwrap().record_rtt(addr.ip(), rtt_ms, false); + srtt.write().unwrap().record_rtt(ip, rtt_ms, false); } return Ok(resp); } Err(e) => { - if let Upstream::Udp(addr) = upstream { - srtt.write().unwrap().record_failure(addr.ip()); + if let Some(ip) = upstream.tracked_ip() { + srtt.write().unwrap().record_failure(ip); } log::debug!("upstream {} failed: {}", upstream, e); last_err = Some(e); @@ -707,4 +718,62 @@ mod tests { assert!(!pool.maybe_update_primary("not-an-ip", 53)); assert_eq!(pool.preferred().unwrap().to_string(), "1.2.3.4:53"); } + + fn tcp_closed_port() -> SocketAddr { + // Bind a TCP listener, grab the port, drop → kernel returns RST on connect. + let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap(); + let addr = listener.local_addr().unwrap(); + drop(listener); + addr + } + + #[tokio::test] + async fn udp_failure_records_in_srtt() { + let blackhole = crate::testutil::blackhole_upstream(); + let pool = UpstreamPool::new(vec![Upstream::Udp(blackhole)], vec![]); + let srtt = RwLock::new(SrttCache::new(true)); + let _ = forward_with_failover_raw( + &[0u8; 12], + &pool, + &srtt, + Duration::from_millis(100), + Duration::ZERO, + ) + .await; + assert!(srtt.read().unwrap().is_known(blackhole.ip())); + } + + #[tokio::test] + async fn dot_failure_records_in_srtt() { + let dead1 = tcp_closed_port(); + let dead2 = tcp_closed_port(); + let connector = build_dot_connector().unwrap(); + let pool = UpstreamPool::new( + vec![ + Upstream::Dot { + addr: dead1, + tls_name: Some("dns.quad9.net".to_string()), + connector: connector.clone(), + }, + Upstream::Dot { + addr: dead2, + tls_name: Some("dns.quad9.net".to_string()), + connector, + }, + ], + vec![], + ); + let srtt = RwLock::new(SrttCache::new(true)); + let _ = forward_with_failover_raw( + &[0u8; 12], + &pool, + &srtt, + Duration::from_millis(500), + Duration::ZERO, + ) + .await; + let cache = srtt.read().unwrap(); + assert!(cache.is_known(dead1.ip())); + assert!(cache.is_known(dead2.ip())); + } } diff --git a/src/system_dns.rs b/src/system_dns.rs index 7f6304b..b70b9d9 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -22,7 +22,7 @@ fn is_loopback_or_stub(addr: &str) -> bool { } /// A conditional forwarding rule: domains matching `suffix` are forwarded to `upstream`. -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct ForwardingRule { pub suffix: String, dot_suffix: String, // pre-computed ".suffix" for zero-alloc matching -- 2.34.1 From 695a8b963c045fb5f7147b24a8610e3e5cac694f Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 07:56:59 +0300 Subject: [PATCH 085/139] feat(linux): run systemd service as unprivileged numa user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - numa.service: User=numa + CAP_NET_BIND_SERVICE + sandboxing block (ProtectSystem=strict, PrivateTmp, seccomp @system-service, etc) - install_service_linux: create numa system user + chown data_dir before first start so TLS-cert generation and state writes land on a numa-owned tree Runtime verified root-free on Linux — network_watch_loop only reads /etc/resolv.conf; all system-DNS mutation stays in the installer, which continues to run as root via sudo. --- numa.service | 34 +++++++++++++++++++++++++++ src/system_dns.rs | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/numa.service b/numa.service index 7e67296..6894078 100644 --- a/numa.service +++ b/numa.service @@ -8,6 +8,40 @@ Type=simple ExecStart={{exe_path}} Restart=always RestartSec=2 + +User=numa +Group=numa + +AmbientCapabilities=CAP_NET_BIND_SERVICE +CapabilityBoundingSet=CAP_NET_BIND_SERVICE + +# StateDirectory maps to crate::data_dir() default on Linux (/var/lib/numa). +# systemd auto-creates + chowns on every start, fixing legacy root-owned trees. +StateDirectory=numa +StateDirectoryMode=0750 +ConfigurationDirectory=numa +ConfigurationDirectoryMode=0755 + +# Sandboxing +NoNewPrivileges=true +ProtectSystem=strict +ProtectHome=true +PrivateTmp=true +PrivateDevices=true +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectControlGroups=true +LockPersonality=true +MemoryDenyWriteExecute=true +RestrictNamespaces=true +RestrictRealtime=true +RestrictSUIDSGID=true +SystemCallArchitectures=native +SystemCallFilter=@system-service +SystemCallFilter=~@privileged @resources +# AF_NETLINK for interface enumeration on network changes +RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX AF_NETLINK + StandardOutput=journal StandardError=journal SyslogIdentifier=numa diff --git a/src/system_dns.rs b/src/system_dns.rs index b70b9d9..7b4de42 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1664,8 +1664,68 @@ fn uninstall_linux() -> Result<(), String> { Ok(()) } +#[cfg(target_os = "linux")] +const NUMA_USER: &str = "numa"; + +#[cfg(target_os = "linux")] +fn ensure_numa_user_linux() -> Result<(), String> { + let _ = std::process::Command::new("groupadd") + .args(["-f", "-r", NUMA_USER]) + .status(); + + let data_dir = crate::data_dir(); + let status = std::process::Command::new("useradd") + .args([ + "-r", + "-g", + NUMA_USER, + "-d", + &data_dir.to_string_lossy(), + "-s", + "/usr/sbin/nologin", + "-c", + "Numa DNS service", + NUMA_USER, + ]) + .status() + .map_err(|e| format!("failed to run useradd: {}", e))?; + + // useradd exit 9 = "username already in use"; idempotent reinstall. + match status.code() { + Some(0) | Some(9) => Ok(()), + Some(code) => Err(format!("useradd {} failed (exit {})", NUMA_USER, code)), + None => Err(format!("useradd {} killed by signal", NUMA_USER)), + } +} + +#[cfg(target_os = "linux")] +fn chown_data_dir_to_numa_linux() -> Result<(), String> { + let dir = crate::data_dir(); + std::fs::create_dir_all(&dir) + .map_err(|e| format!("failed to create {}: {}", dir.display(), e))?; + let owner = format!("{0}:{0}", NUMA_USER); + let status = std::process::Command::new("chown") + .args(["-R", &owner, &dir.to_string_lossy()]) + .status() + .map_err(|e| format!("failed to run chown: {}", e))?; + if !status.success() { + return Err(format!( + "chown {} failed (exit {})", + dir.display(), + status.code().unwrap_or(-1) + )); + } + Ok(()) +} + #[cfg(target_os = "linux")] fn install_service_linux() -> Result<(), String> { + // Create the numa account and hand it ownership of data_dir before the + // first start — TLS-cert generation and state writes happen on the + // unit's first launch and need to land on a numa-owned tree. + ensure_numa_user_linux()?; + chown_data_dir_to_numa_linux()?; + let unit = include_str!("../numa.service"); let unit = replace_exe_path(unit)?; std::fs::write(SYSTEMD_UNIT, unit) -- 2.34.1 From 41aea1dd12b85382b40e4e345ace504153ad0948 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 08:10:04 +0300 Subject: [PATCH 086/139] fix(linux): drop risky sandbox directives that break Rust network daemons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integration test failed with exit 7 on curl to /health after a successful install — service started but never listened. The likely culprits are MemoryDenyWriteExecute (breaks jemalloc/some crypto), SystemCallFilter ~@privileged @resources (blocks setrlimit and friends tokio may use), and RestrictNamespaces/LockPersonality (occasional foot-guns). Pull them and keep a conservative hardening set that's well-tested with Rust network services: no-new-privs, protect-system/home, private tmp and devices, protect-kernel-*, restrict-realtime/suid/address-families. Layer the aggressive bits back in follow-up PRs once tested individually. --- numa.service | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/numa.service b/numa.service index 6894078..44e90c5 100644 --- a/numa.service +++ b/numa.service @@ -22,7 +22,9 @@ StateDirectoryMode=0750 ConfigurationDirectory=numa ConfigurationDirectoryMode=0755 -# Sandboxing +# Sandboxing — conservative set known to work with Rust network daemons. +# Aggressive hardening (MemoryDenyWriteExecute, SystemCallFilter, seccomp +# allow-lists) can be layered on once tested in isolation. NoNewPrivileges=true ProtectSystem=strict ProtectHome=true @@ -31,14 +33,8 @@ PrivateDevices=true ProtectKernelTunables=true ProtectKernelModules=true ProtectControlGroups=true -LockPersonality=true -MemoryDenyWriteExecute=true -RestrictNamespaces=true RestrictRealtime=true RestrictSUIDSGID=true -SystemCallArchitectures=native -SystemCallFilter=@system-service -SystemCallFilter=~@privileged @resources # AF_NETLINK for interface enumeration on network changes RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX AF_NETLINK -- 2.34.1 From 4f6159d9616bf485af38bacaf08ed98a5afe0aa5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 08:20:07 +0300 Subject: [PATCH 087/139] refactor(linux): switch to DynamicUser=yes, drop install-time user creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AUR installs never call `numa install` — PKGBUILD drops the unit straight into /usr/lib/systemd/system and the user runs `systemctl enable numa`. With User=numa the Rust installer's useradd code never fires there, breaking Arch out of the box. DynamicUser=yes sidesteps packaging entirely — systemd allocates a transient UID per start and remaps StateDirectory ownership (including legacy root-owned trees) automatically. Works on any modern systemd. Drops the ensure_numa_user_linux/chown helpers plus NUMA_USER; the unit file alone now captures the privilege-drop story. --- numa.service | 8 +++---- src/system_dns.rs | 60 ----------------------------------------------- 2 files changed, 4 insertions(+), 64 deletions(-) diff --git a/numa.service b/numa.service index 44e90c5..5380b83 100644 --- a/numa.service +++ b/numa.service @@ -9,14 +9,14 @@ ExecStart={{exe_path}} Restart=always RestartSec=2 -User=numa -Group=numa +# Transient system user per start; no PKGBUILD/sysusers setup required. +# systemd remaps the StateDirectory ownership to the dynamic UID on each +# launch, including legacy root-owned trees from pre-drop installs. +DynamicUser=yes AmbientCapabilities=CAP_NET_BIND_SERVICE CapabilityBoundingSet=CAP_NET_BIND_SERVICE -# StateDirectory maps to crate::data_dir() default on Linux (/var/lib/numa). -# systemd auto-creates + chowns on every start, fixing legacy root-owned trees. StateDirectory=numa StateDirectoryMode=0750 ConfigurationDirectory=numa diff --git a/src/system_dns.rs b/src/system_dns.rs index 7b4de42..b70b9d9 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1664,68 +1664,8 @@ fn uninstall_linux() -> Result<(), String> { Ok(()) } -#[cfg(target_os = "linux")] -const NUMA_USER: &str = "numa"; - -#[cfg(target_os = "linux")] -fn ensure_numa_user_linux() -> Result<(), String> { - let _ = std::process::Command::new("groupadd") - .args(["-f", "-r", NUMA_USER]) - .status(); - - let data_dir = crate::data_dir(); - let status = std::process::Command::new("useradd") - .args([ - "-r", - "-g", - NUMA_USER, - "-d", - &data_dir.to_string_lossy(), - "-s", - "/usr/sbin/nologin", - "-c", - "Numa DNS service", - NUMA_USER, - ]) - .status() - .map_err(|e| format!("failed to run useradd: {}", e))?; - - // useradd exit 9 = "username already in use"; idempotent reinstall. - match status.code() { - Some(0) | Some(9) => Ok(()), - Some(code) => Err(format!("useradd {} failed (exit {})", NUMA_USER, code)), - None => Err(format!("useradd {} killed by signal", NUMA_USER)), - } -} - -#[cfg(target_os = "linux")] -fn chown_data_dir_to_numa_linux() -> Result<(), String> { - let dir = crate::data_dir(); - std::fs::create_dir_all(&dir) - .map_err(|e| format!("failed to create {}: {}", dir.display(), e))?; - let owner = format!("{0}:{0}", NUMA_USER); - let status = std::process::Command::new("chown") - .args(["-R", &owner, &dir.to_string_lossy()]) - .status() - .map_err(|e| format!("failed to run chown: {}", e))?; - if !status.success() { - return Err(format!( - "chown {} failed (exit {})", - dir.display(), - status.code().unwrap_or(-1) - )); - } - Ok(()) -} - #[cfg(target_os = "linux")] fn install_service_linux() -> Result<(), String> { - // Create the numa account and hand it ownership of data_dir before the - // first start — TLS-cert generation and state writes happen on the - // unit's first launch and need to land on a numa-owned tree. - ensure_numa_user_linux()?; - chown_data_dir_to_numa_linux()?; - let unit = include_str!("../numa.service"); let unit = replace_exe_path(unit)?; std::fs::write(SYSTEMD_UNIT, unit) -- 2.34.1 From dfeca53e21f1012da4a5cc1183dae85b54f796ad Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 08:48:53 +0300 Subject: [PATCH 088/139] ci: dump journalctl + systemctl status on integration-linux failure --- .github/workflows/ci.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e116744..4bce7c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -93,6 +93,17 @@ jobs: sudo ./target/release/numa uninstall sleep 1 ! curl -sf http://127.0.0.1:5380/health 2>/dev/null + - name: diagnostics on failure + if: failure() + run: | + echo "=== systemctl status numa ===" + sudo systemctl status numa --no-pager -l || true + echo "=== journalctl -u numa (last 200) ===" + sudo journalctl -u numa --no-pager -n 200 || true + echo "=== ss -tulnp on 53/80/443/853/5380 ===" + sudo ss -tulnp 2>/dev/null | grep -E ':(53|80|443|853|5380)\b' || true + echo "=== systemctl is-active systemd-resolved ===" + systemctl is-active systemd-resolved || true - name: cleanup if: always() run: | -- 2.34.1 From 7b9db9e889915cbdcf6394bb077d51d8bbf02ba5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 08:54:34 +0300 Subject: [PATCH 089/139] =?UTF-8?q?fix(linux):=20drop=20ProtectHome=3Dtrue?= =?UTF-8?q?=20=E2=80=94=20blocks=20exec=20when=20binary=20lives=20under=20?= =?UTF-8?q?/home?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integration-linux journalctl showed status=203/EXEC: systemd couldn't exec /home/runner/work/numa/numa/target/release/numa because ProtectHome=yes makes /home invisible to the sandboxed process. My local Docker test passed because the binary was at /workspace, not /home. DynamicUser=yes already implies ProtectHome=read-only, which preserves exec access to binaries living under /home (cargo install, source builds, CI) while blocking writes to user $HOMEs. Keep that default rather than over-restricting. Follow-up worth tracking: install_service_linux could copy the binary to /usr/local/bin/numa the way Windows does at windows_service_exe_path, making the unit's ExecStart independent of where `numa install` was invoked from — then we could set ProtectHome=yes again. --- numa.service | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/numa.service b/numa.service index 5380b83..4794033 100644 --- a/numa.service +++ b/numa.service @@ -27,7 +27,10 @@ ConfigurationDirectoryMode=0755 # allow-lists) can be layered on once tested in isolation. NoNewPrivileges=true ProtectSystem=strict -ProtectHome=true +# DynamicUser= sets ProtectHome=read-only by default — leaves /home +# readable so systemd can exec binaries installed under it (cargo install, +# source builds), while blocking writes to user $HOMEs. Don't set =yes: +# that hides /home entirely and fails with status=203/EXEC. PrivateTmp=true PrivateDevices=true ProtectKernelTunables=true -- 2.34.1 From 3970a9f45c23d4c751a5ed1ff849610e51eee075 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 11:51:32 +0300 Subject: [PATCH 090/139] fix(linux): copy binary to /usr/local/bin when source path isn't world-traversable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DynamicUser=yes' transient account can only traverse world-x directories. The CI binary at /home/runner/work/numa/numa/target/release/numa fails exec with EACCES because /home/runner is mode 0700; same applies to a build under /home//, ~/.cargo/bin, or any private $HOME tree. install_service_binary_linux now walks the binary's path. If every ancestor grants world-execute (Linuxbrew /home/linuxbrew is 0755, /usr/local/bin is fine, install.sh layout works), keep the source path so brew/distro upgrades propagate in place. Otherwise copy to /usr/local/bin/numa and reference that in the unit. Locally verified both branches in an Ubuntu 24.04 systemd container: - CI-like /home/runner (0700) → copies + service binds 5380 - Brew-like /home/linuxbrew (0755) → keeps source path + service binds 5380 --- src/system_dns.rs | 59 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index b70b9d9..726cc1a 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1664,10 +1664,65 @@ fn uninstall_linux() -> Result<(), String> { Ok(()) } +/// Fallback install location when current_exe() sits on a path the +/// dynamic user cannot traverse (e.g. `/home//` mode 0700). +#[cfg(target_os = "linux")] +fn linux_service_exe_path() -> std::path::PathBuf { + std::path::PathBuf::from("/usr/local/bin/numa") +} + +/// True iff every ancestor of `p` (excluding `/`) grants world-execute — +/// i.e. the `DynamicUser=yes` service account can traverse the path and +/// exec the binary without being in any group. Linuxbrew's +/// `/home/linuxbrew` is 0755 (traversable, keep brew's path, upgrades +/// via `brew` propagate). A build tree under `/home//` (0700) or +/// `~/.cargo/bin/` is not (copy to /usr/local/bin so systemd can reach it). +#[cfg(target_os = "linux")] +fn path_world_traversable_linux(p: &std::path::Path) -> bool { + use std::os::unix::fs::PermissionsExt; + let mut current = p; + while let Some(parent) = current.parent() { + if parent.as_os_str().is_empty() || parent == std::path::Path::new("/") { + break; + } + match std::fs::metadata(parent) { + Ok(m) if m.permissions().mode() & 0o001 != 0 => {} + _ => return false, + } + current = parent; + } + true +} + +#[cfg(target_os = "linux")] +fn install_service_binary_linux() -> Result { + let src = std::env::current_exe().map_err(|e| format!("current_exe(): {}", e))?; + if path_world_traversable_linux(&src) { + return Ok(src); + } + let dst = linux_service_exe_path(); + if src == dst { + return Ok(dst); + } + if let Some(parent) = dst.parent() { + std::fs::create_dir_all(parent) + .map_err(|e| format!("failed to create {}: {}", parent.display(), e))?; + } + std::fs::copy(&src, &dst).map_err(|e| { + format!( + "failed to copy {} -> {}: {}", + src.display(), + dst.display(), + e + ) + })?; + Ok(dst) +} + #[cfg(target_os = "linux")] fn install_service_linux() -> Result<(), String> { - let unit = include_str!("../numa.service"); - let unit = replace_exe_path(unit)?; + let exe = install_service_binary_linux()?; + let unit = include_str!("../numa.service").replace("{{exe_path}}", &exe.to_string_lossy()); std::fs::write(SYSTEMD_UNIT, unit) .map_err(|e| format!("failed to write {}: {}", SYSTEMD_UNIT, e))?; -- 2.34.1 From e19505aa952d9ff78d5ecd7e8edc52428401b292 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 11:57:54 +0300 Subject: [PATCH 091/139] fix(linux): narrow replace_exe_path cfg to macos after Linux inlined the substitution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux install_service_linux now does the {{exe_path}} substitution inline because it uses the (potentially copied) binary path returned by install_service_binary_linux, not current_exe(). The shared replace_exe_path helper is dead on Linux — clippy -D warnings caught it. Narrow the function to macos and split the placeholder test: keep the "both templates contain {{exe_path}}" assertion as a cross-platform test (catches placeholder removal on either file), keep the substitution test gated to macos where the function lives. --- src/system_dns.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index 726cc1a..60701e3 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1416,7 +1416,7 @@ pub fn service_status() -> Result<(), String> { } } -#[cfg(any(target_os = "macos", target_os = "linux"))] +#[cfg(target_os = "macos")] fn replace_exe_path(service: &str) -> Result { let exe_path = std::env::current_exe().map_err(|e| format!("failed to get current exe: {}", e))?; @@ -2050,22 +2050,25 @@ Wireless LAN adapter Wi-Fi: } #[test] - #[cfg(any(target_os = "macos", target_os = "linux"))] - fn replace_exe_path_substitutes_template() { + fn install_templates_contain_exe_path_placeholder() { + // Both files are substituted at install time — plist via + // replace_exe_path on macOS, numa.service via inline .replace + // in install_service_linux. Catch placeholder removal early. let plist = include_str!("../com.numa.dns.plist"); let unit = include_str!("../numa.service"); - assert!(plist.contains("{{exe_path}}"), "plist missing placeholder"); assert!( unit.contains("{{exe_path}}"), "unit file missing placeholder" ); + } + #[test] + #[cfg(target_os = "macos")] + fn replace_exe_path_substitutes_template() { + let plist = include_str!("../com.numa.dns.plist"); let result = replace_exe_path(plist).expect("replace_exe_path failed for plist"); assert!(!result.contains("{{exe_path}}")); - - let result = replace_exe_path(unit).expect("replace_exe_path failed for unit"); - assert!(!result.contains("{{exe_path}}")); } #[test] -- 2.34.1 From 067195f2abd9444c34e1e85bed9104d03f0a0d42 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 12:12:11 +0300 Subject: [PATCH 092/139] fix(linux): atomic binary copy + restart instead of start on re-install MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-install failed with ETXTBSY (Text file busy) because std::fs::copy can't overwrite a binary that's currently being executed by the running service. Switch to copy-then-rename: write the new binary to /usr/local/bin/numa.new, then rename over /usr/local/bin/numa. Rename swaps the path while the running process keeps the old inode alive, so DNS keeps serving from the previous binary until restart. Bump systemctl start to restart so the new binary actually loads on re-install (start is a no-op when the unit is already active, which would silently leave the old binary running). Locally verified the full CI sequence: install → curl → reinstall → curl → uninstall → curl-fails. All three assertions pass. --- src/system_dns.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index 60701e3..5a7b999 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1708,10 +1708,18 @@ fn install_service_binary_linux() -> Result { std::fs::create_dir_all(parent) .map_err(|e| format!("failed to create {}: {}", parent.display(), e))?; } - std::fs::copy(&src, &dst).map_err(|e| { + // Atomic replace via temp + rename. Plain copy fails with ETXTBSY when + // re-installing while the service is running the previous binary — + // rename swaps the path while the running process keeps the old inode. + let tmp = dst.with_extension("new"); + std::fs::copy(&src, &tmp).map_err(|e| { + format!("failed to copy {} -> {}: {}", src.display(), tmp.display(), e) + })?; + std::fs::rename(&tmp, &dst).map_err(|e| { + let _ = std::fs::remove_file(&tmp); format!( - "failed to copy {} -> {}: {}", - src.display(), + "failed to rename {} -> {}: {}", + tmp.display(), dst.display(), e ) @@ -1734,7 +1742,9 @@ fn install_service_linux() -> Result<(), String> { eprintln!(" warning: failed to configure system DNS: {}", e); } - run_systemctl(&["start", "numa"])?; + // restart, not start: on re-install the service is already running + // the previous binary; restart picks up the new one. + run_systemctl(&["restart", "numa"])?; eprintln!(" Service installed and started."); eprintln!(" Numa will auto-start on boot and restart if killed."); -- 2.34.1 From 763131478f21dd56708f680b72b1b96acc7acb23 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 12:15:44 +0300 Subject: [PATCH 093/139] fmt: rustfmt format! macro split --- src/system_dns.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/system_dns.rs b/src/system_dns.rs index 5a7b999..fd16e8b 100644 --- a/src/system_dns.rs +++ b/src/system_dns.rs @@ -1713,7 +1713,12 @@ fn install_service_binary_linux() -> Result { // rename swaps the path while the running process keeps the old inode. let tmp = dst.with_extension("new"); std::fs::copy(&src, &tmp).map_err(|e| { - format!("failed to copy {} -> {}: {}", src.display(), tmp.display(), e) + format!( + "failed to copy {} -> {}: {}", + src.display(), + tmp.display(), + e + ) })?; std::fs::rename(&tmp, &dst).map_err(|e| { let _ = std::fs::remove_file(&tmp); -- 2.34.1 From be98a02e493cf2736197158cc4712e699819fa69 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 19:52:06 +0300 Subject: [PATCH 094/139] feat(resolver): filter_aaaa for IPv4-only networks (#112) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When enabled, AAAA queries short-circuit to NODATA (NOERROR + empty answer) so Happy Eyeballs clients don't stall waiting on a v6 address they can't use. Also strips `ipv6hint` SvcParam from HTTPS/SVCB answers (RFC 9460) so Chrome ≥103, Firefox, and Safari don't bypass the AAAA filter via the HTTPS record path. Local data is preserved: overrides, zones, the .numa proxy, and the blocklist sinkhole keep whatever v6 addresses they configure — the filter only kicks in on the cache/forward/recursive path. NODATA is correct per RFC 2308 here; NXDOMAIN would incorrectly imply the name doesn't exist for A queries either. Off by default. Opt in via `filter_aaaa = true` under `[server]`. --- numa.toml | 10 +++ src/config.rs | 18 +++++ src/ctx.rs | 155 ++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/serve.rs | 1 + src/svcb.rs | 177 ++++++++++++++++++++++++++++++++++++++++++++++++ src/testutil.rs | 1 + 7 files changed, 363 insertions(+) create mode 100644 src/svcb.rs diff --git a/numa.toml b/numa.toml index ebb9720..c25654a 100644 --- a/numa.toml +++ b/numa.toml @@ -8,6 +8,16 @@ api_port = 5380 # %PROGRAMDATA%\numa on windows. Override for # containerized deploys or tests that can't # write to the system path. +# filter_aaaa = true # on IPv4-only networks, answer AAAA queries with + # NODATA (NOERROR + empty answer) so Happy Eyeballs + # clients don't wait on a v6 attempt that can't + # succeed. Also strips `ipv6hint` from HTTPS/SVCB + # records (RFC 9460) so modern browsers (Chrome + # ≥103, Firefox, Safari) don't bypass the AAAA + # filter via SVCB hints. Local zones, overrides, + # and the .numa proxy are NOT filtered — you can + # still configure v6 records for local services. + # Default: false. # [upstream] # mode = "forward" # "forward" (default) — relay to upstream diff --git a/src/config.rs b/src/config.rs index 90d1ba3..309344b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -93,6 +93,12 @@ pub struct ServerConfig { /// Defaults to `crate::data_dir()` (platform-specific system path) if unset. #[serde(default)] pub data_dir: Option, + /// Synthesize NODATA (NOERROR + empty answer) for AAAA queries, and + /// strip `ipv6hint` from HTTPS/SVCB responses (RFC 9460). For IPv4-only + /// networks where Happy Eyeballs fallback adds latency. Local zones, + /// overrides, and the service proxy are not affected. Default false. + #[serde(default)] + pub filter_aaaa: bool, } impl Default for ServerConfig { @@ -102,6 +108,7 @@ impl Default for ServerConfig { api_port: default_api_port(), api_bind_addr: default_api_bind_addr(), data_dir: None, + filter_aaaa: false, } } } @@ -580,6 +587,17 @@ mod tests { assert!(config.lan.enabled); } + #[test] + fn filter_aaaa_defaults_false() { + assert!(!ServerConfig::default().filter_aaaa); + } + + #[test] + fn filter_aaaa_parses_from_server_section() { + let config: Config = toml::from_str("[server]\nfilter_aaaa = true").unwrap(); + assert!(config.server.filter_aaaa); + } + #[test] fn custom_bind_addrs_parse() { let toml = r#" diff --git a/src/ctx.rs b/src/ctx.rs index 3a3a58a..b3f7ae2 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -77,6 +77,10 @@ pub struct ServerCtx { pub ca_pem: Option, pub mobile_enabled: bool, pub mobile_port: u16, + /// When true, AAAA queries short-circuit with NODATA (NOERROR + empty + /// answer) instead of hitting cache/forwarding/upstream. Local data + /// (overrides, zones, .numa proxy, blocklist sinkhole) is unaffected. + pub filter_aaaa: bool, } /// Transport-agnostic DNS resolution. Runs the full pipeline (overrides, blocklist, @@ -172,6 +176,13 @@ pub async fn resolve_query( 60, )); (resp, QueryPath::Blocked, DnssecStatus::Indeterminate) + } else if qtype == QueryType::AAAA && ctx.filter_aaaa { + // RFC 2308 NODATA: NOERROR with empty answer section. Prevents + // Happy Eyeballs clients from waiting on an AAAA they'll never use + // on IPv4-only networks. NXDOMAIN would be wrong (it'd imply the + // name doesn't exist for A either). + let resp = DnsPacket::response_from(&query, ResultCode::NOERROR); + (resp, QueryPath::Local, DnssecStatus::Indeterminate) } else { let cached = ctx.cache.read().unwrap().lookup_with_status(&qname, qtype); if let Some((cached, cached_dnssec, freshness)) = cached { @@ -334,6 +345,13 @@ pub async fn resolve_query( strip_dnssec_records(&mut response); } + // filter_aaaa: also strip ipv6hint from HTTPS/SVCB answers so modern + // browsers (Chrome ≥103 etc.) don't receive v6 address hints via the + // HTTPS record path that bypasses AAAA entirely. + if ctx.filter_aaaa { + strip_https_ipv6_hints(&mut response); + } + // Echo EDNS back if client sent it if query.edns.is_some() { response.edns = Some(crate::packet::EdnsOpt { @@ -491,6 +509,29 @@ fn strip_dnssec_records(pkt: &mut DnsPacket) { pkt.resources.retain(|r| !is_dnssec_record(r)); } +/// HTTPS RR type code (RFC 9460). Numa stores HTTPS/SVCB records as +/// `DnsRecord::UNKNOWN { qtype: 65, .. }` since it doesn't have a +/// dedicated variant. +const HTTPS_TYPE: u16 = 65; + +fn strip_https_ipv6_hints(pkt: &mut DnsPacket) { + let rewrite = |rec: &mut DnsRecord| { + if let DnsRecord::UNKNOWN { + qtype: HTTPS_TYPE, + data, + .. + } = rec + { + if let Some(new_data) = crate::svcb::strip_ipv6hint(data) { + *data = new_data; + } + } + }; + pkt.answers.iter_mut().for_each(rewrite); + pkt.authorities.iter_mut().for_each(rewrite); + pkt.resources.iter_mut().for_each(rewrite); +} + fn is_special_use_domain(qname: &str) -> bool { if qname.ends_with(".in-addr.arpa") { // RFC 6303: private + loopback + link-local reverse DNS @@ -1187,6 +1228,120 @@ mod tests { } } + #[tokio::test] + async fn pipeline_filter_aaaa_returns_nodata() { + let mut ctx = crate::testutil::test_ctx().await; + ctx.filter_aaaa = true; + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "example.com", QueryType::AAAA).await; + assert_eq!(path, QueryPath::Local); + assert_eq!(resp.header.rescode, ResultCode::NOERROR); + assert!(resp.answers.is_empty(), "AAAA must be filtered to NODATA"); + } + + #[tokio::test] + async fn pipeline_filter_aaaa_leaves_a_queries_alone() { + let mut upstream_resp = DnsPacket::new(); + upstream_resp.header.response = true; + upstream_resp.header.rescode = ResultCode::NOERROR; + upstream_resp.answers.push(DnsRecord::A { + domain: "example.com".to_string(), + addr: Ipv4Addr::new(93, 184, 216, 34), + ttl: 300, + }); + let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await; + + let mut ctx = crate::testutil::test_ctx().await; + ctx.filter_aaaa = true; + ctx.upstream_pool + .lock() + .unwrap() + .set_primary(vec![Upstream::Udp(upstream_addr)]); + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "example.com", QueryType::A).await; + assert_eq!(path, QueryPath::Upstream); + assert_eq!(resp.answers.len(), 1); + } + + #[tokio::test] + async fn pipeline_filter_aaaa_respects_override() { + let mut ctx = crate::testutil::test_ctx().await; + ctx.filter_aaaa = true; + ctx.overrides + .write() + .unwrap() + .insert("v6.test", "2001:db8::1", 60, None) + .unwrap(); + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "v6.test", QueryType::AAAA).await; + assert_eq!(path, QueryPath::Overridden); + assert_eq!(resp.answers.len(), 1, "override must win over filter"); + } + + #[tokio::test] + async fn pipeline_filter_aaaa_strips_ipv6hint_from_https() { + // Build an HTTPS record (type 65) with ipv6hint (key 6). Cache it, + // then query with filter_aaaa on — the returned rdata must have + // ipv6hint removed. + let mut rdata = Vec::new(); + rdata.extend_from_slice(&1u16.to_be_bytes()); // priority + rdata.push(0); // empty target (".") + // alpn = ["h3"] + rdata.extend_from_slice(&1u16.to_be_bytes()); + rdata.extend_from_slice(&3u16.to_be_bytes()); + rdata.extend_from_slice(&[0x02, b'h', b'3']); + // ipv6hint = [2606:4700::1] + rdata.extend_from_slice(&6u16.to_be_bytes()); + rdata.extend_from_slice(&16u16.to_be_bytes()); + rdata.extend_from_slice(&[ + 0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, + ]); + + let mut pkt = DnsPacket::new(); + pkt.header.response = true; + pkt.header.rescode = ResultCode::NOERROR; + pkt.questions.push(crate::question::DnsQuestion { + name: "hints.test".to_string(), + qtype: QueryType::HTTPS, + }); + pkt.answers.push(DnsRecord::UNKNOWN { + domain: "hints.test".to_string(), + qtype: 65, + data: rdata.clone(), + ttl: 300, + }); + + let mut ctx = crate::testutil::test_ctx().await; + ctx.filter_aaaa = true; + ctx.cache + .write() + .unwrap() + .insert("hints.test", QueryType::HTTPS, &pkt); + let ctx = Arc::new(ctx); + + let (resp, path) = resolve_in_test(&ctx, "hints.test", QueryType::HTTPS).await; + assert_eq!(path, QueryPath::Cached); + assert_eq!(resp.answers.len(), 1); + match &resp.answers[0] { + DnsRecord::UNKNOWN { data, .. } => { + assert!( + data.len() < rdata.len(), + "ipv6hint (20 bytes) must be removed" + ); + // Bytes for key=6 must not appear at any 4-byte boundary in the + // params section — cheap structural check. + assert!( + !data.windows(4).any(|w| w == [0, 6, 0, 16]), + "ipv6hint TLV header must be absent" + ); + } + other => panic!("expected UNKNOWN record, got {:?}", other), + } + } + #[tokio::test] async fn pipeline_blocklist_sinkhole() { let ctx = crate::testutil::test_ctx().await; diff --git a/src/lib.rs b/src/lib.rs index a16568b..bce8833 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,7 @@ pub mod service_store; pub mod setup_phone; pub mod srtt; pub mod stats; +pub mod svcb; pub mod system_dns; pub mod tls; pub mod wire; diff --git a/src/serve.rs b/src/serve.rs index 1a9a764..8e85b32 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -236,6 +236,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { ca_pem, mobile_enabled: config.mobile.enabled, mobile_port: config.mobile.port, + filter_aaaa: config.server.filter_aaaa, }); let zone_count: usize = ctx.zone_map.values().map(|m| m.len()).sum(); diff --git a/src/svcb.rs b/src/svcb.rs new file mode 100644 index 0000000..2228443 --- /dev/null +++ b/src/svcb.rs @@ -0,0 +1,177 @@ +//! Minimal SVCB/HTTPS (RFC 9460) RDATA parser — just enough to strip +//! the `ipv6hint` SvcParam. Used by the `filter_aaaa` feature so +//! HTTPS-record-aware clients (Chrome ≥103, Firefox, Safari) don't +//! receive v6 address hints on IPv4-only networks. + +/// SvcParamKey = 6 (RFC 9460 §14.3.2). +const IPV6_HINT_KEY: u16 = 6; + +/// Strip the `ipv6hint` SvcParam from an HTTPS/SVCB RDATA blob. +/// +/// Returns `Some(new_rdata)` if `ipv6hint` was present and removed. +/// Returns `None` if the record had no `ipv6hint`, or if the RDATA +/// couldn't be parsed — in both cases the caller should keep the +/// original bytes untouched. +/// +/// SVCB RDATA (RFC 9460 §2.2): +/// SvcPriority (u16) +/// TargetName (uncompressed DNS name — labels terminated by 0 octet) +/// SvcParams (series of {u16 key, u16 len, opaque[len] value}, sorted by key) +pub fn strip_ipv6hint(rdata: &[u8]) -> Option> { + if rdata.len() < 2 { + return None; + } + let mut pos = 2; + + // TargetName — uncompressed per RFC 9460 §2.2 + loop { + let len = *rdata.get(pos)? as usize; + pos += 1; + if len == 0 { + break; + } + if len & 0xC0 != 0 { + // Pointer: forbidden in SVCB but defend against a broken upstream. + return None; + } + pos = pos.checked_add(len)?; + if pos > rdata.len() { + return None; + } + } + + // Scan params once to decide whether we need to rebuild. + let params_start = pos; + let mut scan = pos; + let mut has_ipv6hint = false; + while scan < rdata.len() { + if scan + 4 > rdata.len() { + return None; + } + let key = u16::from_be_bytes([rdata[scan], rdata[scan + 1]]); + let vlen = u16::from_be_bytes([rdata[scan + 2], rdata[scan + 3]]) as usize; + let end = scan.checked_add(4)?.checked_add(vlen)?; + if end > rdata.len() { + return None; + } + if key == IPV6_HINT_KEY { + has_ipv6hint = true; + } + scan = end; + } + if scan != rdata.len() || !has_ipv6hint { + return None; + } + + // Rebuild without ipv6hint, preserving param order (RFC 9460 requires + // ascending key order, which we preserve by filtering in place). + let mut out = Vec::with_capacity(rdata.len()); + out.extend_from_slice(&rdata[..params_start]); + let mut pos = params_start; + while pos < rdata.len() { + let key = u16::from_be_bytes([rdata[pos], rdata[pos + 1]]); + let vlen = u16::from_be_bytes([rdata[pos + 2], rdata[pos + 3]]) as usize; + let end = pos + 4 + vlen; + if key != IPV6_HINT_KEY { + out.extend_from_slice(&rdata[pos..end]); + } + pos = end; + } + Some(out) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Build an SVCB RDATA blob from a priority, target labels, and + /// (key, value) param pairs. Used for constructing test vectors. + fn build(priority: u16, target: &[&str], params: &[(u16, Vec)]) -> Vec { + let mut out = Vec::new(); + out.extend_from_slice(&priority.to_be_bytes()); + for label in target { + out.push(label.len() as u8); + out.extend_from_slice(label.as_bytes()); + } + out.push(0); + for (key, value) in params { + out.extend_from_slice(&key.to_be_bytes()); + out.extend_from_slice(&(value.len() as u16).to_be_bytes()); + out.extend_from_slice(value); + } + out + } + + fn alpn_h3() -> (u16, Vec) { + // alpn = ["h3"]: one length-prefixed ALPN id + (1, vec![0x02, b'h', b'3']) + } + + fn ipv4hint_single() -> (u16, Vec) { + (4, vec![93, 184, 216, 34]) + } + + fn ipv6hint_single() -> (u16, Vec) { + // 2606:4700::1 + ( + 6, + vec![ + 0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, + ], + ) + } + + #[test] + fn strips_ipv6hint_and_keeps_other_params() { + let rdata = build(1, &[], &[alpn_h3(), ipv4hint_single(), ipv6hint_single()]); + let stripped = strip_ipv6hint(&rdata).expect("ipv6hint present → stripped"); + let expected = build(1, &[], &[alpn_h3(), ipv4hint_single()]); + assert_eq!(stripped, expected); + } + + #[test] + fn no_ipv6hint_returns_none() { + let rdata = build(1, &[], &[alpn_h3(), ipv4hint_single()]); + assert!(strip_ipv6hint(&rdata).is_none()); + } + + #[test] + fn alias_mode_empty_params_returns_none() { + let rdata = build(0, &["example", "com"], &[]); + assert!(strip_ipv6hint(&rdata).is_none()); + } + + #[test] + fn only_ipv6hint_yields_empty_param_section() { + let rdata = build(1, &[], &[ipv6hint_single()]); + let stripped = strip_ipv6hint(&rdata).expect("ipv6hint present → stripped"); + let expected = build(1, &[], &[]); + assert_eq!(stripped, expected); + } + + #[test] + fn preserves_target_name() { + let rdata = build(1, &["svc", "example", "net"], &[ipv6hint_single()]); + let stripped = strip_ipv6hint(&rdata).unwrap(); + assert!(stripped.starts_with(&[0x00, 0x01])); // priority + assert_eq!(&stripped[2..6], b"\x03svc"); + } + + #[test] + fn truncated_rdata_returns_none() { + // Priority only, no target terminator. + assert!(strip_ipv6hint(&[0, 1, 3, b'c', b'o', b'm']).is_none()); + } + + #[test] + fn empty_input_returns_none() { + assert!(strip_ipv6hint(&[]).is_none()); + } + + #[test] + fn param_length_overflow_returns_none() { + // key=6, length=0xFFFF but value is short — malformed. + let rdata = vec![0, 1, 0, 0, 6, 0xFF, 0xFF, 0, 1, 2]; + assert!(strip_ipv6hint(&rdata).is_none()); + } +} diff --git a/src/testutil.rs b/src/testutil.rs index 8687625..fab861b 100644 --- a/src/testutil.rs +++ b/src/testutil.rs @@ -63,6 +63,7 @@ pub async fn test_ctx() -> ServerCtx { ca_pem: None, mobile_enabled: false, mobile_port: 8765, + filter_aaaa: false, } } -- 2.34.1 From b02b607fb908781c6584665bbe3e6c477afb2058 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 20:07:24 +0300 Subject: [PATCH 095/139] ci(linux): assert numa daemon does not run as root Locks in the invariant this branch establishes: a regression that reverts to User=root would otherwise ship green. --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4bce7c2..1e015ab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,6 +87,9 @@ jobs: sleep 2 curl -sf http://127.0.0.1:5380/health dig @127.0.0.1 example.com +short +timeout=5 | grep -q '.' + user=$(ps -o user= -p "$(systemctl show -p MainPID --value numa)" | tr -d ' ') + echo "numa running as: $user" + test "$user" != "root" sudo ./target/release/numa install sleep 2 curl -sf http://127.0.0.1:5380/health -- 2.34.1 From fb41a6f8b59b846d2413812ab823a40735b38130 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sat, 18 Apr 2026 22:00:54 +0300 Subject: [PATCH 096/139] test(linux): systemd service install verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three scenarios CI cannot run: every advertised port is functional (DNS resolves, TLS chain validates against numa's CA, HTTP/API respond), CA fingerprint survives upgrade from pre-drop layout, binary staging fallback from a 0700 source dir. Self-bootstraps a privileged systemd-as-PID1 container — no dependency on long-lived test containers. MainPID user assertion retries until comm=numa to avoid a race where systemctl reports active while MainPID still points at a transitional process. --- tests/docker/install-systemd.sh | 288 ++++++++++++++++++++++++++++++++ 1 file changed, 288 insertions(+) create mode 100755 tests/docker/install-systemd.sh diff --git a/tests/docker/install-systemd.sh b/tests/docker/install-systemd.sh new file mode 100755 index 0000000..aa9c31a --- /dev/null +++ b/tests/docker/install-systemd.sh @@ -0,0 +1,288 @@ +#!/usr/bin/env bash +# +# Systemd service install verification for the DynamicUser-based Linux +# service unit. Stands up a privileged ubuntu:24.04 container with systemd +# as PID 1, builds numa inside, runs three scenarios that CI does not: +# +# A. Fresh install — every advertised port is not just bound but +# functional (DNS resolves on :53, TLS handshake validates against +# numa's CA on :853/:443, HTTP responds on :80, API on :5380). +# B. Upgrade from pre-drop layout (root-owned /var/lib/numa) preserves +# the CA fingerprint — users' browser-installed CA trust survives. +# C. Install from a 0700 source directory stages the binary under +# /usr/local/bin/numa and the service starts from there. +# +# First run is slow (~5-10 min): image pull + apt + cold cargo build. +# Subsequent runs reuse cached docker volumes for cargo + target (~30s). +# +# Requirements: docker +# Usage: ./tests/docker/install-systemd.sh + +set -u +set -o pipefail + +GREEN="\033[32m"; RED="\033[31m"; RESET="\033[0m" + +pass() { printf " ${GREEN}PASS${RESET}: %s\n" "$*"; } +fail() { printf " ${RED}FAIL${RESET}: %s\n" "$*"; FAIL=1; } + +# ============================================================ +# Mode B: running inside the systemd container — run scenarios +# ============================================================ +if [ "${NUMA_INSIDE:-}" = "1" ]; then + set +e # assertions report pass/fail, don't abort + FAIL=0 + NUMA=/work/target/release/numa + + reset_state() { + "$NUMA" uninstall >/dev/null 2>&1 || true + systemctl reset-failed numa 2>/dev/null || true + rm -rf /var/lib/numa /var/lib/private/numa /etc/numa /home/builder /usr/local/bin/numa + systemctl daemon-reload 2>/dev/null || true + } + + main_pid_user() { + local pid + pid=$(systemctl show -p MainPID --value numa) + [ "$pid" != "0" ] || { echo ""; return; } + ps -o user= -p "$pid" 2>/dev/null | tr -d ' ' + } + + # MainPID + user briefly stabilize after a fresh restart. Retry so we + # don't race the moment systemd flips the service to "active" vs when + # the forked numa process actually owns MainPID. + assert_nonroot() { + local pid user comm n=0 + while [ $n -lt 20 ]; do + pid=$(systemctl show -p MainPID --value numa) + if [ "$pid" != "0" ]; then + comm=$(ps -o comm= -p "$pid" 2>/dev/null | tr -d ' ') + user=$(ps -o user= -p "$pid" 2>/dev/null | tr -d ' ') + if [ "$comm" = "numa" ]; then + if [ "$user" = "root" ]; then + fail "daemon runs as root (expected transient UID)" + else + pass "daemon runs as $user (non-root)" + fi + return + fi + fi + sleep 0.2 + n=$((n + 1)) + done + fail "numa MainPID did not settle (last: pid=${pid:-?} comm=${comm:-?} user=${user:-?})" + } + + # Functional DNS check: just "port 53 bound" isn't enough — systemd-resolved + # listens on 127.0.0.53 and would satisfy a bind test. Retries for ~15s + # to tolerate cold-start upstream / blocklist warmup. + assert_dns_works() { + local n=0 + while [ $n -lt 15 ]; do + if dig @127.0.0.1 -p 53 example.com +short +timeout=2 +tries=1 2>/dev/null \ + | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'; then + pass "DNS resolves on :53 (A record returned)" + return + fi + sleep 1 + n=$((n + 1)) + done + fail "DNS did not return an A record on :53 within 15s" + } + + # TLS handshake: cert must validate against numa's CA when connecting + # to a .numa SNI. Catches port-not-bound, wrong cert, missing CA file. + assert_tls_handshake() { + local port=$1 sni=${2:-numa.numa} out + if out=$(openssl s_client -connect "127.0.0.1:${port}" \ + -servername "$sni" \ + -CAfile /var/lib/numa/ca.pem \ + -verify_return_error &1); then + if echo "$out" | grep -q 'Verify return code: 0 (ok)'; then + pass "TLS handshake + cert chain verified on :${port}" + else + fail "TLS handshake on :${port} did not report 'Verify return code: 0'" + fi + else + fail "openssl s_client failed connecting to :${port}" + fi + } + + assert_http_responds() { + local code + code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 http://127.0.0.1/ || echo 000) + if [ "$code" != "000" ]; then + pass "HTTP responds on :80 (status $code)" + else + fail "HTTP :80 connection failed" + fi + } + + assert_api_healthy() { + if curl -sf --max-time 3 http://127.0.0.1:5380/health >/dev/null; then + pass "API /health OK on :5380" + else + fail "API /health failed on :5380" + fi + } + + ca_fingerprint() { + openssl x509 -in /var/lib/numa/ca.pem -noout -fingerprint -sha256 2>/dev/null \ + | sed 's/.*=//' + } + + wait_active() { + local n=0 + while [ $n -lt 20 ]; do + systemctl is-active --quiet numa && return 0 + sleep 0.5 + n=$((n + 1)) + done + fail "service did not become active within 10s" + systemctl status numa --no-pager -l 2>&1 | head -20 || true + return 1 + } + + # ---- Scenario A ---- + printf "\n=== Scenario A: fresh install — every advertised port is functional ===\n" + reset_state + "$NUMA" install >/tmp/installA.log 2>&1 || { fail "install failed"; tail -20 /tmp/installA.log; } + wait_active || true + assert_nonroot + assert_dns_works + assert_tls_handshake 853 + assert_tls_handshake 443 + assert_http_responds + assert_api_healthy + + # ---- Scenario B ---- + # Pre-drop installs left /var/lib/numa as a plain root-owned tree. + # Flattening the current DynamicUser layout back into that shape + # simulates the upgrade path without needing an actual old binary. + printf "\n=== Scenario B: CA fingerprint survives upgrade from pre-drop layout ===\n" + fp_before=$(ca_fingerprint) + if [ -z "$fp_before" ]; then + fail "could not read initial CA fingerprint (skipping scenario B)" + else + echo " CA fingerprint before: $fp_before" + "$NUMA" uninstall >/dev/null 2>&1 || true + tmp=$(mktemp -d) + cp -a /var/lib/private/numa/. "$tmp"/ 2>/dev/null || true + rm -rf /var/lib/numa /var/lib/private/numa + mv "$tmp" /var/lib/numa + chown -R root:root /var/lib/numa + chmod 755 /var/lib/numa + [ -f /var/lib/numa/ca.pem ] || fail "ca.pem missing from seeded legacy tree" + + "$NUMA" install >/tmp/installB.log 2>&1 || { fail "upgrade install failed"; tail -20 /tmp/installB.log; } + wait_active || true + assert_nonroot + fp_after=$(ca_fingerprint) + if [ -z "$fp_after" ]; then + fail "could not read CA fingerprint after upgrade" + elif [ "$fp_before" = "$fp_after" ]; then + pass "CA fingerprint preserved across upgrade" + else + fail "CA fingerprint changed: before=$fp_before after=$fp_after" + fi + assert_dns_works + fi + + # ---- Scenario C ---- + printf "\n=== Scenario C: install from unreachable source stages binary to /usr/local/bin ===\n" + reset_state + mkdir -p /home/builder + chmod 700 /home/builder + cp "$NUMA" /home/builder/numa + chmod 755 /home/builder/numa + /home/builder/numa install >/tmp/installC.log 2>&1 || { fail "install failed"; tail -20 /tmp/installC.log; } + wait_active || true + if [ -x /usr/local/bin/numa ]; then + pass "binary staged to /usr/local/bin/numa" + else + fail "/usr/local/bin/numa missing after install from 0700 source" + fi + exec_line=$(grep '^ExecStart=' /etc/systemd/system/numa.service 2>/dev/null || echo "ExecStart=") + if echo "$exec_line" | grep -q '/usr/local/bin/numa'; then + pass "unit ExecStart points to staged path" + else + fail "unit ExecStart wrong: $exec_line" + fi + assert_nonroot + assert_dns_works + + reset_state + rm -rf /home/builder + echo + if [ "$FAIL" -eq 0 ]; then + printf "${GREEN}── all scenarios passed ──${RESET}\n" + exit 0 + else + printf "${RED}── some scenarios failed ──${RESET}\n" + exit 1 + fi +fi + +# ============================================================ +# Mode A: host-side bootstrap +# ============================================================ +set -e +cd "$(dirname "$0")/../.." + +IMAGE=numa-install-systemd:local +CONTAINER="numa-install-systemd-$$" +trap 'docker rm -f "$CONTAINER" >/dev/null 2>&1 || true' EXIT + +echo "── building systemd-in-container image (cached after first run) ──" +docker build --quiet -t "$IMAGE" -f - . <<'DOCKERFILE' >/dev/null +FROM ubuntu:24.04 +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update -qq && apt-get install -y -qq \ + systemd systemd-sysv systemd-resolved \ + ca-certificates curl build-essential \ + pkg-config libssl-dev cmake make perl \ + dnsutils iproute2 openssl \ + && rm -rf /var/lib/apt/lists/* \ + && for u in dev-hugepages.mount sys-fs-fuse-connections.mount \ + systemd-logind.service getty.target console-getty.service; do \ + systemctl mask $u; \ + done +STOPSIGNAL SIGRTMIN+3 +CMD ["/lib/systemd/systemd"] +DOCKERFILE + +echo "── starting systemd container ──" +docker run -d --name "$CONTAINER" \ + --privileged --cgroupns=host \ + --tmpfs /run --tmpfs /run/lock --tmpfs /tmp:exec \ + -v "$PWD:/src:ro" \ + -v numa-install-systemd-cargo:/root/.cargo \ + -v numa-install-systemd-work:/work \ + "$IMAGE" >/dev/null + +# Wait for systemd to be up +for _ in $(seq 1 30); do + state=$(docker exec "$CONTAINER" systemctl is-system-running 2>&1 || true) + case "$state" in running|degraded) break ;; esac + sleep 0.5 +done + +echo "── copying source into /work (writable) ──" +docker exec "$CONTAINER" bash -c ' +mkdir -p /work +tar -C /src --exclude=./target --exclude=./.git --exclude=./.claude -cf - . | tar -C /work -xf - +' + +echo "── rustup + cargo build --release --locked ──" +docker exec "$CONTAINER" bash -c ' +set -e +if ! command -v cargo &>/dev/null; then + curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --quiet +fi +. "$HOME/.cargo/env" +cd /work +cargo build --release --locked 2>&1 | tail -5 +' + +echo "── running scenarios ──" +docker exec -e NUMA_INSIDE=1 "$CONTAINER" bash /src/tests/docker/install-systemd.sh -- 2.34.1 From 8014ebac9e9fdbf32e693c5f94ccca940177b64f Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 19 Apr 2026 05:52:29 +0300 Subject: [PATCH 097/139] test(integration): add Suite 7 for filter_aaaa + SUITES env filter Suite 7 exercises the full pipeline end-to-end: A resolves, AAAA returns NODATA, local [[zones]] AAAA bypasses the filter, and HTTPS ipv6hint is stripped from a real cloudflare.com response. A second config run with the flag unset guards against network-failure false-positives. SUITES=N (comma list) runs a subset, e.g. `SUITES=7 bash tests/integration.sh` skips suites 1-6 for fast iteration. --- tests/integration.sh | 158 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 1 deletion(-) diff --git a/tests/integration.sh b/tests/integration.sh index c70ec59..81bd28d 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -1,7 +1,10 @@ #!/usr/bin/env bash # Integration test suite for Numa # Runs a test instance on port 5354, validates all features, exits with status. -# Usage: ./tests/integration.sh [release|debug] +# Usage: +# ./tests/integration.sh [release|debug] # all suites +# SUITES=7 ./tests/integration.sh # only Suite 7 +# SUITES=1,3,7 ./tests/integration.sh # Suites 1, 3, and 7 set -euo pipefail @@ -14,6 +17,14 @@ LOG="/tmp/numa-integration-test.log" PASSED=0 FAILED=0 +# Suite filter: empty runs all; comma list runs a subset. +SUITES="${SUITES:-}" +should_run_suite() { + [ -z "$SUITES" ] && return 0 + case ",$SUITES," in *",$1,"*) return 0;; esac + return 1 +} + # Colors GREEN="\033[32m" RED="\033[31m" @@ -166,6 +177,7 @@ CONF } # ---- Suite 1: Recursive mode + DNSSEC ---- +if should_run_suite 1; then echo "" echo "╔══════════════════════════════════════════╗" echo "║ Suite 1: Recursive + DNSSEC + Blocking ║" @@ -234,7 +246,10 @@ kill "$NUMA_PID" 2>/dev/null || true wait "$NUMA_PID" 2>/dev/null || true sleep 1 +fi # end Suite 1 + # ---- Suite 2: Forward mode (backward compat) ---- +if should_run_suite 2; then echo "" echo "╔══════════════════════════════════════════╗" echo "║ Suite 2: Forward (DoH) + Blocking ║" @@ -261,7 +276,10 @@ enabled = true enabled = false " +fi # end Suite 2 + # ---- Suite 3: Forward UDP (plain, no DoH) ---- +if should_run_suite 3; then echo "" echo "╔══════════════════════════════════════════╗" echo "║ Suite 3: Forward (UDP) + No Blocking ║" @@ -307,7 +325,10 @@ kill "$NUMA_PID" 2>/dev/null || true wait "$NUMA_PID" 2>/dev/null || true sleep 1 +fi # end Suite 3 + # ---- Suite 4: Local zones + Overrides API ---- +if should_run_suite 4; then echo "" echo "╔══════════════════════════════════════════╗" echo "║ Suite 4: Local Zones + Overrides API ║" @@ -416,7 +437,10 @@ kill "$NUMA_PID" 2>/dev/null || true wait "$NUMA_PID" 2>/dev/null || true sleep 1 +fi # end Suite 4 + # ---- Suite 5: DNS-over-TLS (RFC 7858) ---- +if should_run_suite 5; then echo "" echo "╔══════════════════════════════════════════╗" echo "║ Suite 5: DNS-over-TLS (RFC 7858) ║" @@ -538,7 +562,10 @@ CONF fi sleep 1 +fi # end Suite 5 + # ---- Suite 6: Proxy + DoT coexistence ---- +if should_run_suite 6; then echo "" echo "╔══════════════════════════════════════════╗" echo "║ Suite 6: Proxy + DoT Coexistence ║" @@ -698,6 +725,135 @@ CONF rm -rf "$NUMA_DATA" fi +fi # end Suite 6 + +# ---- Suite 7: filter_aaaa (IPv4-only networks) ---- +if should_run_suite 7; then +echo "" +echo "╔══════════════════════════════════════════╗" +echo "║ Suite 7: filter_aaaa ║" +echo "╚══════════════════════════════════════════╝" + +# Config A — filter on, with a local AAAA zone to prove local data bypass. +cat > "$CONFIG" << 'CONF' +[server] +bind_addr = "127.0.0.1:5354" +api_port = 5381 +filter_aaaa = true + +[upstream] +mode = "forward" +address = "9.9.9.9" +port = 53 + +[cache] +max_entries = 10000 + +[blocking] +enabled = false + +[proxy] +enabled = false + +[[zones]] +domain = "v6.test" +record_type = "AAAA" +value = "2001:db8::1" +ttl = 60 +CONF + +RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & +NUMA_PID=$! +sleep 3 + +DIG="dig @127.0.0.1 -p $PORT +time=5 +tries=1" + +echo "" +echo "=== filter_aaaa = true ===" + +# A queries must be untouched. +check "A record resolves under filter_aaaa" \ + "." \ + "$($DIG google.com A +short | head -1)" + +# AAAA must be NOERROR (NODATA), not NXDOMAIN, not SERVFAIL. +check "AAAA returns NOERROR (not NXDOMAIN)" \ + "status: NOERROR" \ + "$($DIG google.com AAAA 2>&1 | grep 'status:')" + +check "AAAA returns zero answers (NODATA shape)" \ + "ANSWER: 0" \ + "$($DIG google.com AAAA 2>&1 | grep -oE 'ANSWER: [0-9]+' | head -1)" + +# Local zone AAAA must survive the filter (PR claim: local data bypasses). +check "Local [[zones]] AAAA bypasses filter" \ + "2001:db8::1" \ + "$($DIG v6.test AAAA +short)" + +# HTTPS RR: ipv6hint (SvcParamKey 6) must be stripped. Query as `type65` +# because dig 9.10.6 (macOS) misparses `HTTPS` as a domain name; `type65` +# works on both 9.10.6 and 9.18. Assert on the raw rdata hex (RFC 3597 +# generic format), since dig 9.10.6 doesn't pretty-print HTTPS params. +# cloudflare.com's ipv6hint values sit under the 2606:4700 prefix — +# checking that `26064700` is absent from the rdata hex is a precise, +# upstream-stable signal that the TLV was stripped. +HTTPS_OUT=$($DIG cloudflare.com type65 2>&1) +if echo "$HTTPS_OUT" | grep -qE "cloudflare\.com\..*IN[[:space:]]+TYPE65"; then + HTTPS_HEX=$(echo "$HTTPS_OUT" | grep -A5 "IN[[:space:]]*TYPE65" | tr -d " \t\n") + if echo "$HTTPS_HEX" | grep -qi "26064700"; then + check "HTTPS ipv6hint stripped (2606:4700 absent from rdata)" "absent" "present" + else + check "HTTPS ipv6hint stripped (2606:4700 absent from rdata)" "absent" "absent" + fi +else + # Upstream didn't return an HTTPS record — skip rather than false-pass. + printf " ${DIM}~ HTTPS ipv6hint stripped (skipped: no HTTPS RR returned by upstream)${RESET}\n" +fi + +kill "$NUMA_PID" 2>/dev/null || true +wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + +# Config B — filter off. Regression guard: prove AAAA answers come back +# when the flag isn't set, so a network failure in Config A can't silently +# pass as "filter working". +cat > "$CONFIG" << 'CONF' +[server] +bind_addr = "127.0.0.1:5354" +api_port = 5381 + +[upstream] +mode = "forward" +address = "9.9.9.9" +port = 53 + +[cache] +max_entries = 10000 + +[blocking] +enabled = false + +[proxy] +enabled = false +CONF + +RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & +NUMA_PID=$! +sleep 3 + +echo "" +echo "=== filter_aaaa unset (regression guard) ===" + +check "AAAA returns real answers with filter off" \ + ":" \ + "$($DIG google.com AAAA +short | head -1)" + +kill "$NUMA_PID" 2>/dev/null || true +wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + +fi # end Suite 7 + # Summary echo "" TOTAL=$((PASSED + FAILED)) -- 2.34.1 From 22dd3cd2222f7d19994125f61800c5eb3af672b5 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 19 Apr 2026 05:52:37 +0300 Subject: [PATCH 098/139] fix(resolver): skip ipv6hint strip for DO-bit clients Modifying HTTPS rdata invalidates any accompanying RRSIG, so a DNSSEC- validating downstream would reject the response as Bogus. Gate the strip on !client_do, matching the existing DNSSEC-records strip. Adds a regression test that catches the gate being removed: builds a query with EDNS DO=1, asserts the HTTPS rdata round-trips untouched. --- src/ctx.rs | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index b3f7ae2..0b7dd80 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -347,8 +347,10 @@ pub async fn resolve_query( // filter_aaaa: also strip ipv6hint from HTTPS/SVCB answers so modern // browsers (Chrome ≥103 etc.) don't receive v6 address hints via the - // HTTPS record path that bypasses AAAA entirely. - if ctx.filter_aaaa { + // HTTPS record path that bypasses AAAA entirely. Gated on !client_do + // because modifying rdata invalidates any accompanying RRSIG — a DO-bit + // validator downstream would reject the response as Bogus. + if ctx.filter_aaaa && !client_do { strip_https_ipv6_hints(&mut response); } @@ -1342,6 +1344,71 @@ mod tests { } } + #[tokio::test] + async fn pipeline_filter_aaaa_preserves_ipv6hint_for_dnssec_clients() { + // Regression guard for the DO-bit gate in resolve_query: modifying + // HTTPS rdata invalidates any accompanying RRSIG, so a DO=1 client + // must receive the record untouched even when filter_aaaa is on. + let mut rdata = Vec::new(); + rdata.extend_from_slice(&1u16.to_be_bytes()); + rdata.push(0); + rdata.extend_from_slice(&6u16.to_be_bytes()); + rdata.extend_from_slice(&16u16.to_be_bytes()); + rdata.extend_from_slice(&[ + 0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, + ]); + + let mut pkt = DnsPacket::new(); + pkt.header.response = true; + pkt.header.rescode = ResultCode::NOERROR; + pkt.questions.push(crate::question::DnsQuestion { + name: "hints.test".to_string(), + qtype: QueryType::HTTPS, + }); + pkt.answers.push(DnsRecord::UNKNOWN { + domain: "hints.test".to_string(), + qtype: 65, + data: rdata.clone(), + ttl: 300, + }); + + let mut ctx = crate::testutil::test_ctx().await; + ctx.filter_aaaa = true; + ctx.cache + .write() + .unwrap() + .insert("hints.test", QueryType::HTTPS, &pkt); + let ctx = Arc::new(ctx); + + // Build a query with EDNS DO bit set — can't use resolve_in_test + // because it constructs a plain query without EDNS. + let mut query = DnsPacket::query(0xBEEF, "hints.test", QueryType::HTTPS); + query.edns = Some(crate::packet::EdnsOpt { + do_bit: true, + ..Default::default() + }); + let mut buf = BytePacketBuffer::new(); + query.write(&mut buf).unwrap(); + let raw = &buf.buf[..buf.pos]; + let src: SocketAddr = "127.0.0.1:1234".parse().unwrap(); + + let (resp_buf, _) = resolve_query(query, raw, src, &ctx, Transport::Udp) + .await + .unwrap(); + let mut resp_parse_buf = BytePacketBuffer::from_bytes(resp_buf.filled()); + let resp = DnsPacket::from_buffer(&mut resp_parse_buf).unwrap(); + + match &resp.answers[0] { + DnsRecord::UNKNOWN { data, .. } => { + assert_eq!( + data, &rdata, + "ipv6hint must be preserved for DO-bit clients" + ); + } + other => panic!("expected UNKNOWN record, got {:?}", other), + } + } + #[tokio::test] async fn pipeline_blocklist_sinkhole() { let ctx = crate::testutil::test_ctx().await; -- 2.34.1 From 61ea2e510d5a5f7b4c9e375de375c04073512abd Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 19 Apr 2026 05:58:47 +0300 Subject: [PATCH 099/139] refactor: dedupe HTTPS_TYPE, record-walk, and test rdata builder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop `const HTTPS_TYPE: u16 = 65;` in favor of `QueryType::HTTPS.to_num()` at the single call site — avoids a fresh magic number alongside the existing enum mapping in question.rs. - Add `DnsPacket::for_each_record_mut` so `strip_https_ipv6_hints` stops hand-rolling the answers/authorities/resources walk; future section rewrites go through the same helper. - Promote the SVCB test-rdata builder from `svcb::tests` to module scope as `pub(crate) #[cfg(test)] fn build_rdata`, and reuse it in the two pipeline tests in ctx.rs — kills ~20 lines of byte-fiddling and keeps one RDATA-construction code path. --- src/ctx.rs | 70 +++++++++++++++++++++------------------------------ src/packet.rs | 8 ++++++ src/svcb.rs | 56 +++++++++++++++++++++++------------------ 3 files changed, 68 insertions(+), 66 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 0b7dd80..0dcef51 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -511,27 +511,17 @@ fn strip_dnssec_records(pkt: &mut DnsPacket) { pkt.resources.retain(|r| !is_dnssec_record(r)); } -/// HTTPS RR type code (RFC 9460). Numa stores HTTPS/SVCB records as -/// `DnsRecord::UNKNOWN { qtype: 65, .. }` since it doesn't have a -/// dedicated variant. -const HTTPS_TYPE: u16 = 65; - fn strip_https_ipv6_hints(pkt: &mut DnsPacket) { - let rewrite = |rec: &mut DnsRecord| { - if let DnsRecord::UNKNOWN { - qtype: HTTPS_TYPE, - data, - .. - } = rec - { - if let Some(new_data) = crate::svcb::strip_ipv6hint(data) { - *data = new_data; + let https_qtype = QueryType::HTTPS.to_num(); + pkt.for_each_record_mut(|rec| { + if let DnsRecord::UNKNOWN { qtype, data, .. } = rec { + if *qtype == https_qtype { + if let Some(new_data) = crate::svcb::strip_ipv6hint(data) { + *data = new_data; + } } } - }; - pkt.answers.iter_mut().for_each(rewrite); - pkt.authorities.iter_mut().for_each(rewrite); - pkt.resources.iter_mut().for_each(rewrite); + }); } fn is_special_use_domain(qname: &str) -> bool { @@ -1285,22 +1275,20 @@ mod tests { #[tokio::test] async fn pipeline_filter_aaaa_strips_ipv6hint_from_https() { - // Build an HTTPS record (type 65) with ipv6hint (key 6). Cache it, + // Build an HTTPS record (type 65) with alpn + ipv6hint, cache it, // then query with filter_aaaa on — the returned rdata must have - // ipv6hint removed. - let mut rdata = Vec::new(); - rdata.extend_from_slice(&1u16.to_be_bytes()); // priority - rdata.push(0); // empty target (".") - // alpn = ["h3"] - rdata.extend_from_slice(&1u16.to_be_bytes()); - rdata.extend_from_slice(&3u16.to_be_bytes()); - rdata.extend_from_slice(&[0x02, b'h', b'3']); - // ipv6hint = [2606:4700::1] - rdata.extend_from_slice(&6u16.to_be_bytes()); - rdata.extend_from_slice(&16u16.to_be_bytes()); - rdata.extend_from_slice(&[ - 0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, - ]); + // ipv6hint (20 bytes) removed. + let rdata = crate::svcb::build_rdata( + 1, + &[], + &[ + (1, vec![0x02, b'h', b'3']), + ( + 6, + vec![0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01], + ), + ], + ); let mut pkt = DnsPacket::new(); pkt.header.response = true; @@ -1349,14 +1337,14 @@ mod tests { // Regression guard for the DO-bit gate in resolve_query: modifying // HTTPS rdata invalidates any accompanying RRSIG, so a DO=1 client // must receive the record untouched even when filter_aaaa is on. - let mut rdata = Vec::new(); - rdata.extend_from_slice(&1u16.to_be_bytes()); - rdata.push(0); - rdata.extend_from_slice(&6u16.to_be_bytes()); - rdata.extend_from_slice(&16u16.to_be_bytes()); - rdata.extend_from_slice(&[ - 0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, - ]); + let rdata = crate::svcb::build_rdata( + 1, + &[], + &[( + 6, + vec![0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01], + )], + ); let mut pkt = DnsPacket::new(); pkt.header.response = true; diff --git a/src/packet.rs b/src/packet.rs index ba9e30a..a621c13 100644 --- a/src/packet.rs +++ b/src/packet.rs @@ -85,6 +85,14 @@ impl DnsPacket { + self.edns.as_ref().map_or(0, |e| e.options.capacity()) } + /// Apply `f` to every record in the three RR sections (answers, + /// authorities, resources). Does not touch questions or edns. + pub fn for_each_record_mut(&mut self, mut f: impl FnMut(&mut DnsRecord)) { + self.answers.iter_mut().for_each(&mut f); + self.authorities.iter_mut().for_each(&mut f); + self.resources.iter_mut().for_each(&mut f); + } + pub fn response_from(query: &DnsPacket, rescode: crate::header::ResultCode) -> DnsPacket { let mut resp = DnsPacket::new(); resp.header.id = query.header.id; diff --git a/src/svcb.rs b/src/svcb.rs index 2228443..444b063 100644 --- a/src/svcb.rs +++ b/src/svcb.rs @@ -80,28 +80,34 @@ pub fn strip_ipv6hint(rdata: &[u8]) -> Option> { Some(out) } +/// Build an SVCB RDATA blob from a priority, target labels, and +/// (key, value) param pairs. Shared by `svcb` unit tests and `ctx` +/// pipeline tests that need to seed the cache with a synthetic HTTPS RR. +#[cfg(test)] +pub(crate) fn build_rdata( + priority: u16, + target: &[&str], + params: &[(u16, Vec)], +) -> Vec { + let mut out = Vec::new(); + out.extend_from_slice(&priority.to_be_bytes()); + for label in target { + out.push(label.len() as u8); + out.extend_from_slice(label.as_bytes()); + } + out.push(0); + for (key, value) in params { + out.extend_from_slice(&key.to_be_bytes()); + out.extend_from_slice(&(value.len() as u16).to_be_bytes()); + out.extend_from_slice(value); + } + out +} + #[cfg(test)] mod tests { use super::*; - /// Build an SVCB RDATA blob from a priority, target labels, and - /// (key, value) param pairs. Used for constructing test vectors. - fn build(priority: u16, target: &[&str], params: &[(u16, Vec)]) -> Vec { - let mut out = Vec::new(); - out.extend_from_slice(&priority.to_be_bytes()); - for label in target { - out.push(label.len() as u8); - out.extend_from_slice(label.as_bytes()); - } - out.push(0); - for (key, value) in params { - out.extend_from_slice(&key.to_be_bytes()); - out.extend_from_slice(&(value.len() as u16).to_be_bytes()); - out.extend_from_slice(value); - } - out - } - fn alpn_h3() -> (u16, Vec) { // alpn = ["h3"]: one length-prefixed ALPN id (1, vec![0x02, b'h', b'3']) @@ -123,35 +129,35 @@ mod tests { #[test] fn strips_ipv6hint_and_keeps_other_params() { - let rdata = build(1, &[], &[alpn_h3(), ipv4hint_single(), ipv6hint_single()]); + let rdata = build_rdata(1, &[], &[alpn_h3(), ipv4hint_single(), ipv6hint_single()]); let stripped = strip_ipv6hint(&rdata).expect("ipv6hint present → stripped"); - let expected = build(1, &[], &[alpn_h3(), ipv4hint_single()]); + let expected = build_rdata(1, &[], &[alpn_h3(), ipv4hint_single()]); assert_eq!(stripped, expected); } #[test] fn no_ipv6hint_returns_none() { - let rdata = build(1, &[], &[alpn_h3(), ipv4hint_single()]); + let rdata = build_rdata(1, &[], &[alpn_h3(), ipv4hint_single()]); assert!(strip_ipv6hint(&rdata).is_none()); } #[test] fn alias_mode_empty_params_returns_none() { - let rdata = build(0, &["example", "com"], &[]); + let rdata = build_rdata(0, &["example", "com"], &[]); assert!(strip_ipv6hint(&rdata).is_none()); } #[test] fn only_ipv6hint_yields_empty_param_section() { - let rdata = build(1, &[], &[ipv6hint_single()]); + let rdata = build_rdata(1, &[], &[ipv6hint_single()]); let stripped = strip_ipv6hint(&rdata).expect("ipv6hint present → stripped"); - let expected = build(1, &[], &[]); + let expected = build_rdata(1, &[], &[]); assert_eq!(stripped, expected); } #[test] fn preserves_target_name() { - let rdata = build(1, &["svc", "example", "net"], &[ipv6hint_single()]); + let rdata = build_rdata(1, &["svc", "example", "net"], &[ipv6hint_single()]); let stripped = strip_ipv6hint(&rdata).unwrap(); assert!(stripped.starts_with(&[0x00, 0x01])); // priority assert_eq!(&stripped[2..6], b"\x03svc"); -- 2.34.1 From d6bb9a0f01f778f22bc03f7305a03377ea0abf24 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 19 Apr 2026 06:24:54 +0300 Subject: [PATCH 100/139] fmt: rustfmt vec literal wrapping + signature collapse --- src/ctx.rs | 8 ++++++-- src/svcb.rs | 6 +----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 0dcef51..eeeb71f 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -1285,7 +1285,9 @@ mod tests { (1, vec![0x02, b'h', b'3']), ( 6, - vec![0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01], + vec![ + 0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, + ], ), ], ); @@ -1342,7 +1344,9 @@ mod tests { &[], &[( 6, - vec![0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01], + vec![ + 0x26, 0x06, 0x47, 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, + ], )], ); diff --git a/src/svcb.rs b/src/svcb.rs index 444b063..ef65d04 100644 --- a/src/svcb.rs +++ b/src/svcb.rs @@ -84,11 +84,7 @@ pub fn strip_ipv6hint(rdata: &[u8]) -> Option> { /// (key, value) param pairs. Shared by `svcb` unit tests and `ctx` /// pipeline tests that need to seed the cache with a synthetic HTTPS RR. #[cfg(test)] -pub(crate) fn build_rdata( - priority: u16, - target: &[&str], - params: &[(u16, Vec)], -) -> Vec { +pub(crate) fn build_rdata(priority: u16, target: &[&str], params: &[(u16, Vec)]) -> Vec { let mut out = Vec::new(); out.extend_from_slice(&priority.to_be_bytes()); for label in target { -- 2.34.1 From 5e85b147b97826314451af83f65161e40375830d Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 19 Apr 2026 06:52:30 +0300 Subject: [PATCH 101/139] feat(resolver): apply ipv6hint strip to SVCB (type 64) too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HTTPS (65) and SVCB (64) share the RDATA wire format, so the existing parser already handles both — only the call site was HTTPS-only. Widen the qtype check and extend the existing pipeline test with a second query for SVCB. --- src/ctx.rs | 72 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index eeeb71f..0ba33c8 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -351,7 +351,7 @@ pub async fn resolve_query( // because modifying rdata invalidates any accompanying RRSIG — a DO-bit // validator downstream would reject the response as Bogus. if ctx.filter_aaaa && !client_do { - strip_https_ipv6_hints(&mut response); + strip_svcb_ipv6_hints(&mut response); } // Echo EDNS back if client sent it @@ -511,11 +511,16 @@ fn strip_dnssec_records(pkt: &mut DnsPacket) { pkt.resources.retain(|r| !is_dnssec_record(r)); } -fn strip_https_ipv6_hints(pkt: &mut DnsPacket) { +/// SVCB and HTTPS share the same RDATA wire format (RFC 9460), so the +/// ipv6hint strip applies to both. SVCB has no `QueryType` variant — it +/// arrives as `UNKNOWN { qtype: 64, .. }`. +const SVCB_QTYPE: u16 = 64; + +fn strip_svcb_ipv6_hints(pkt: &mut DnsPacket) { let https_qtype = QueryType::HTTPS.to_num(); pkt.for_each_record_mut(|rec| { if let DnsRecord::UNKNOWN { qtype, data, .. } = rec { - if *qtype == https_qtype { + if *qtype == https_qtype || *qtype == SVCB_QTYPE { if let Some(new_data) = crate::svcb::strip_ipv6hint(data) { *data = new_data; } @@ -1274,10 +1279,12 @@ mod tests { } #[tokio::test] - async fn pipeline_filter_aaaa_strips_ipv6hint_from_https() { - // Build an HTTPS record (type 65) with alpn + ipv6hint, cache it, - // then query with filter_aaaa on — the returned rdata must have - // ipv6hint (20 bytes) removed. + async fn pipeline_filter_aaaa_strips_ipv6hint_from_https_and_svcb() { + // HTTPS (type 65) and SVCB (type 64) share the same RDATA wire + // format (RFC 9460); the filter must strip ipv6hint from both. + // Build one HTTPS record with alpn + ipv6hint, then re-key it as + // SVCB and assert the returned rdata has the 20-byte hint removed + // in both cases. let rdata = crate::svcb::build_rdata( 1, &[], @@ -1306,31 +1313,50 @@ mod tests { ttl: 300, }); + // Seed an SVCB record (type 64) under a different name — same wire + // format as HTTPS, must get the same treatment. + let mut svcb_pkt = pkt.clone(); + svcb_pkt.questions[0].name = "svc.test".to_string(); + svcb_pkt.questions[0].qtype = QueryType::UNKNOWN(64); + if let DnsRecord::UNKNOWN { domain, qtype, .. } = &mut svcb_pkt.answers[0] { + *domain = "svc.test".to_string(); + *qtype = 64; + } + let mut ctx = crate::testutil::test_ctx().await; ctx.filter_aaaa = true; ctx.cache .write() .unwrap() .insert("hints.test", QueryType::HTTPS, &pkt); + ctx.cache + .write() + .unwrap() + .insert("svc.test", QueryType::UNKNOWN(64), &svcb_pkt); let ctx = Arc::new(ctx); - let (resp, path) = resolve_in_test(&ctx, "hints.test", QueryType::HTTPS).await; - assert_eq!(path, QueryPath::Cached); - assert_eq!(resp.answers.len(), 1); - match &resp.answers[0] { - DnsRecord::UNKNOWN { data, .. } => { - assert!( - data.len() < rdata.len(), - "ipv6hint (20 bytes) must be removed" - ); - // Bytes for key=6 must not appear at any 4-byte boundary in the - // params section — cheap structural check. - assert!( - !data.windows(4).any(|w| w == [0, 6, 0, 16]), - "ipv6hint TLV header must be absent" - ); + for (name, qtype, label) in [ + ("hints.test", QueryType::HTTPS, "HTTPS"), + ("svc.test", QueryType::UNKNOWN(64), "SVCB"), + ] { + let (resp, path) = resolve_in_test(&ctx, name, qtype).await; + assert_eq!(path, QueryPath::Cached, "{label}"); + assert_eq!(resp.answers.len(), 1, "{label}"); + match &resp.answers[0] { + DnsRecord::UNKNOWN { data, .. } => { + assert!( + data.len() < rdata.len(), + "{label}: ipv6hint (20 bytes) must be removed" + ); + // Bytes for key=6 must not appear at any 4-byte boundary in the + // params section — cheap structural check. + assert!( + !data.windows(4).any(|w| w == [0, 6, 0, 16]), + "{label}: ipv6hint TLV header must be absent" + ); + } + other => panic!("{label}: expected UNKNOWN record, got {other:?}"), } - other => panic!("expected UNKNOWN record, got {:?}", other), } } -- 2.34.1 From f9e996ae78c644d6bda63341070107601b6d78fa Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 19 Apr 2026 06:53:47 +0300 Subject: [PATCH 102/139] fmt: drop redundant comments per house style --- src/ctx.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 0ba33c8..23b1014 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -511,9 +511,6 @@ fn strip_dnssec_records(pkt: &mut DnsPacket) { pkt.resources.retain(|r| !is_dnssec_record(r)); } -/// SVCB and HTTPS share the same RDATA wire format (RFC 9460), so the -/// ipv6hint strip applies to both. SVCB has no `QueryType` variant — it -/// arrives as `UNKNOWN { qtype: 64, .. }`. const SVCB_QTYPE: u16 = 64; fn strip_svcb_ipv6_hints(pkt: &mut DnsPacket) { @@ -1280,11 +1277,6 @@ mod tests { #[tokio::test] async fn pipeline_filter_aaaa_strips_ipv6hint_from_https_and_svcb() { - // HTTPS (type 65) and SVCB (type 64) share the same RDATA wire - // format (RFC 9460); the filter must strip ipv6hint from both. - // Build one HTTPS record with alpn + ipv6hint, then re-key it as - // SVCB and assert the returned rdata has the 20-byte hint removed - // in both cases. let rdata = crate::svcb::build_rdata( 1, &[], @@ -1313,8 +1305,6 @@ mod tests { ttl: 300, }); - // Seed an SVCB record (type 64) under a different name — same wire - // format as HTTPS, must get the same treatment. let mut svcb_pkt = pkt.clone(); svcb_pkt.questions[0].name = "svc.test".to_string(); svcb_pkt.questions[0].qtype = QueryType::UNKNOWN(64); -- 2.34.1 From 24610ae3fe759de22dff1fbcdc164fed937f52e9 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 19 Apr 2026 07:49:35 +0300 Subject: [PATCH 103/139] feat(question): add SVCB, LOC, NAPTR variants to QueryType MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Logs were printing UNKNOWN(64), UNKNOWN(29), UNKNOWN(35) for SVCB, LOC, and NAPTR — three RR types that have been registered for years and show up in the wild (notably SVCB via RFC 9462 DDR clients querying _dns.resolver.arpa). Adds the variants and replaces the SVCB_QTYPE u16 const introduced in #119 with QueryType::SVCB.to_num(), matching the HTTPS path. Closes #114. --- src/ctx.rs | 11 +++++------ src/question.rs | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 23b1014..71e81c9 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -511,13 +511,12 @@ fn strip_dnssec_records(pkt: &mut DnsPacket) { pkt.resources.retain(|r| !is_dnssec_record(r)); } -const SVCB_QTYPE: u16 = 64; - fn strip_svcb_ipv6_hints(pkt: &mut DnsPacket) { let https_qtype = QueryType::HTTPS.to_num(); + let svcb_qtype = QueryType::SVCB.to_num(); pkt.for_each_record_mut(|rec| { if let DnsRecord::UNKNOWN { qtype, data, .. } = rec { - if *qtype == https_qtype || *qtype == SVCB_QTYPE { + if *qtype == https_qtype || *qtype == svcb_qtype { if let Some(new_data) = crate::svcb::strip_ipv6hint(data) { *data = new_data; } @@ -1307,7 +1306,7 @@ mod tests { let mut svcb_pkt = pkt.clone(); svcb_pkt.questions[0].name = "svc.test".to_string(); - svcb_pkt.questions[0].qtype = QueryType::UNKNOWN(64); + svcb_pkt.questions[0].qtype = QueryType::SVCB; if let DnsRecord::UNKNOWN { domain, qtype, .. } = &mut svcb_pkt.answers[0] { *domain = "svc.test".to_string(); *qtype = 64; @@ -1322,12 +1321,12 @@ mod tests { ctx.cache .write() .unwrap() - .insert("svc.test", QueryType::UNKNOWN(64), &svcb_pkt); + .insert("svc.test", QueryType::SVCB, &svcb_pkt); let ctx = Arc::new(ctx); for (name, qtype, label) in [ ("hints.test", QueryType::HTTPS, "HTTPS"), - ("svc.test", QueryType::UNKNOWN(64), "SVCB"), + ("svc.test", QueryType::SVCB, "SVCB"), ] { let (resp, path) = resolve_in_test(&ctx, name, qtype).await; assert_eq!(path, QueryPath::Cached, "{label}"); diff --git a/src/question.rs b/src/question.rs index dc23dd1..9523339 100644 --- a/src/question.rs +++ b/src/question.rs @@ -12,13 +12,16 @@ pub enum QueryType { MX, // 15 TXT, // 16 AAAA, // 28 + LOC, // 29 SRV, // 33 + NAPTR, // 35 DS, // 43 RRSIG, // 46 NSEC, // 47 DNSKEY, // 48 NSEC3, // 50 OPT, // 41 (EDNS0 pseudo-type) + SVCB, // 64 HTTPS, // 65 } @@ -34,13 +37,16 @@ impl QueryType { QueryType::MX => 15, QueryType::TXT => 16, QueryType::AAAA => 28, + QueryType::LOC => 29, QueryType::SRV => 33, + QueryType::NAPTR => 35, QueryType::OPT => 41, QueryType::DS => 43, QueryType::RRSIG => 46, QueryType::NSEC => 47, QueryType::DNSKEY => 48, QueryType::NSEC3 => 50, + QueryType::SVCB => 64, QueryType::HTTPS => 65, } } @@ -55,13 +61,16 @@ impl QueryType { 15 => QueryType::MX, 16 => QueryType::TXT, 28 => QueryType::AAAA, + 29 => QueryType::LOC, 33 => QueryType::SRV, + 35 => QueryType::NAPTR, 41 => QueryType::OPT, 43 => QueryType::DS, 46 => QueryType::RRSIG, 47 => QueryType::NSEC, 48 => QueryType::DNSKEY, 50 => QueryType::NSEC3, + 64 => QueryType::SVCB, 65 => QueryType::HTTPS, _ => QueryType::UNKNOWN(num), } @@ -77,13 +86,16 @@ impl QueryType { QueryType::MX => "MX", QueryType::TXT => "TXT", QueryType::AAAA => "AAAA", + QueryType::LOC => "LOC", QueryType::SRV => "SRV", + QueryType::NAPTR => "NAPTR", QueryType::OPT => "OPT", QueryType::DS => "DS", QueryType::RRSIG => "RRSIG", QueryType::NSEC => "NSEC", QueryType::DNSKEY => "DNSKEY", QueryType::NSEC3 => "NSEC3", + QueryType::SVCB => "SVCB", QueryType::HTTPS => "HTTPS", QueryType::UNKNOWN(_) => "UNKNOWN", } @@ -99,12 +111,15 @@ impl QueryType { "MX" => Some(QueryType::MX), "TXT" => Some(QueryType::TXT), "AAAA" => Some(QueryType::AAAA), + "LOC" => Some(QueryType::LOC), "SRV" => Some(QueryType::SRV), + "NAPTR" => Some(QueryType::NAPTR), "DS" => Some(QueryType::DS), "RRSIG" => Some(QueryType::RRSIG), "DNSKEY" => Some(QueryType::DNSKEY), "NSEC" => Some(QueryType::NSEC), "NSEC3" => Some(QueryType::NSEC3), + "SVCB" => Some(QueryType::SVCB), "HTTPS" => Some(QueryType::HTTPS), _ => None, } -- 2.34.1 From 5725f94ff34684c9ebb7681dafc091e9940f51ee Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Sun, 19 Apr 2026 08:01:18 +0300 Subject: [PATCH 104/139] refactor(question): collapse QueryType impls behind define_qtypes! macro Adding a record type used to require 5 edits across the file (enum variant, to_num, from_num, as_str, parse_str). The macro takes a single (variant, num, str) tuple per type and generates the enum plus all four methods. UNKNOWN(u16) stays hand-coded since it carries data and can't sit in the table. src/question.rs: 156 lines -> 92 lines, no behavior change. --- src/question.rs | 179 ++++++++++++++++-------------------------------- 1 file changed, 58 insertions(+), 121 deletions(-) diff --git a/src/question.rs b/src/question.rs index 9523339..fbb3fef 100644 --- a/src/question.rs +++ b/src/question.rs @@ -1,129 +1,66 @@ use crate::buffer::BytePacketBuffer; use crate::Result; -#[derive(PartialEq, Eq, Debug, Clone, Hash, Copy)] -pub enum QueryType { - UNKNOWN(u16), - A, // 1 - NS, // 2 - CNAME, // 5 - SOA, // 6 - PTR, // 12 - MX, // 15 - TXT, // 16 - AAAA, // 28 - LOC, // 29 - SRV, // 33 - NAPTR, // 35 - DS, // 43 - RRSIG, // 46 - NSEC, // 47 - DNSKEY, // 48 - NSEC3, // 50 - OPT, // 41 (EDNS0 pseudo-type) - SVCB, // 64 - HTTPS, // 65 +macro_rules! define_qtypes { + ( $( $variant:ident = $num:literal, $str:literal ),* $(,)? ) => { + #[derive(PartialEq, Eq, Debug, Clone, Hash, Copy)] + pub enum QueryType { + UNKNOWN(u16), + $( $variant, )* + } + + impl QueryType { + pub fn to_num(&self) -> u16 { + match *self { + QueryType::UNKNOWN(x) => x, + $( QueryType::$variant => $num, )* + } + } + + pub fn from_num(num: u16) -> QueryType { + match num { + $( $num => QueryType::$variant, )* + _ => QueryType::UNKNOWN(num), + } + } + + pub fn as_str(&self) -> &'static str { + match self { + QueryType::UNKNOWN(_) => "UNKNOWN", + $( QueryType::$variant => $str, )* + } + } + + pub fn parse_str(s: &str) -> Option { + match s.to_ascii_uppercase().as_str() { + $( $str => Some(QueryType::$variant), )* + _ => None, + } + } + } + }; } -impl QueryType { - pub fn to_num(&self) -> u16 { - match *self { - QueryType::UNKNOWN(x) => x, - QueryType::A => 1, - QueryType::NS => 2, - QueryType::CNAME => 5, - QueryType::SOA => 6, - QueryType::PTR => 12, - QueryType::MX => 15, - QueryType::TXT => 16, - QueryType::AAAA => 28, - QueryType::LOC => 29, - QueryType::SRV => 33, - QueryType::NAPTR => 35, - QueryType::OPT => 41, - QueryType::DS => 43, - QueryType::RRSIG => 46, - QueryType::NSEC => 47, - QueryType::DNSKEY => 48, - QueryType::NSEC3 => 50, - QueryType::SVCB => 64, - QueryType::HTTPS => 65, - } - } - - pub fn from_num(num: u16) -> QueryType { - match num { - 1 => QueryType::A, - 2 => QueryType::NS, - 5 => QueryType::CNAME, - 6 => QueryType::SOA, - 12 => QueryType::PTR, - 15 => QueryType::MX, - 16 => QueryType::TXT, - 28 => QueryType::AAAA, - 29 => QueryType::LOC, - 33 => QueryType::SRV, - 35 => QueryType::NAPTR, - 41 => QueryType::OPT, - 43 => QueryType::DS, - 46 => QueryType::RRSIG, - 47 => QueryType::NSEC, - 48 => QueryType::DNSKEY, - 50 => QueryType::NSEC3, - 64 => QueryType::SVCB, - 65 => QueryType::HTTPS, - _ => QueryType::UNKNOWN(num), - } - } - - pub fn as_str(&self) -> &'static str { - match self { - QueryType::A => "A", - QueryType::NS => "NS", - QueryType::CNAME => "CNAME", - QueryType::SOA => "SOA", - QueryType::PTR => "PTR", - QueryType::MX => "MX", - QueryType::TXT => "TXT", - QueryType::AAAA => "AAAA", - QueryType::LOC => "LOC", - QueryType::SRV => "SRV", - QueryType::NAPTR => "NAPTR", - QueryType::OPT => "OPT", - QueryType::DS => "DS", - QueryType::RRSIG => "RRSIG", - QueryType::NSEC => "NSEC", - QueryType::DNSKEY => "DNSKEY", - QueryType::NSEC3 => "NSEC3", - QueryType::SVCB => "SVCB", - QueryType::HTTPS => "HTTPS", - QueryType::UNKNOWN(_) => "UNKNOWN", - } - } - - pub fn parse_str(s: &str) -> Option { - match s.to_ascii_uppercase().as_str() { - "A" => Some(QueryType::A), - "NS" => Some(QueryType::NS), - "CNAME" => Some(QueryType::CNAME), - "SOA" => Some(QueryType::SOA), - "PTR" => Some(QueryType::PTR), - "MX" => Some(QueryType::MX), - "TXT" => Some(QueryType::TXT), - "AAAA" => Some(QueryType::AAAA), - "LOC" => Some(QueryType::LOC), - "SRV" => Some(QueryType::SRV), - "NAPTR" => Some(QueryType::NAPTR), - "DS" => Some(QueryType::DS), - "RRSIG" => Some(QueryType::RRSIG), - "DNSKEY" => Some(QueryType::DNSKEY), - "NSEC" => Some(QueryType::NSEC), - "NSEC3" => Some(QueryType::NSEC3), - "SVCB" => Some(QueryType::SVCB), - "HTTPS" => Some(QueryType::HTTPS), - _ => None, - } - } +define_qtypes! { + A = 1, "A", + NS = 2, "NS", + CNAME = 5, "CNAME", + SOA = 6, "SOA", + PTR = 12, "PTR", + MX = 15, "MX", + TXT = 16, "TXT", + AAAA = 28, "AAAA", + LOC = 29, "LOC", + SRV = 33, "SRV", + NAPTR = 35, "NAPTR", + OPT = 41, "OPT", + DS = 43, "DS", + RRSIG = 46, "RRSIG", + NSEC = 47, "NSEC", + DNSKEY = 48, "DNSKEY", + NSEC3 = 50, "NSEC3", + SVCB = 64, "SVCB", + HTTPS = 65, "HTTPS", } #[derive(Debug, Clone, PartialEq, Eq)] -- 2.34.1 From 241c40553b76bd7f5b5a7cdbcfc4005803803797 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 12:34:04 +0300 Subject: [PATCH 105/139] feat(odoh): ship ODoH client + self-hosted relay (RFC 9230) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Client (mode = "odoh"): URL-query target routing per RFC 9230 §5, /.well-known/odohconfigs TTL cache with 60s backoff on failure, HPKE seal/open via odoh-rs, strict-mode default that SERVFAILs on relay failure instead of silently downgrading. Host-equality config validation rejects same-operator relay/target pairs. Relay (`numa relay [PORT]`): axum server with /relay + /health. SSRF-hardened hostname validator (RFC 1035 ASCII + dot + dash), 4 KiB body cap at the axum layer, 5s full-transaction timeout, and static 502 on target failure (reqwest internals logged, not leaked). Aggregate counters only — no per-request logs. Observability: new `UpstreamTransport { Udp, Doh, Dot, Odoh }` orthogonal to `QueryPath`, so /stats can tally wire protocols symmetrically. Recursive mode records `Some(Udp)` for honest "bytes egressing in cleartext" accounting. Tests: Suite 8 exercises the client end-to-end via Frank Denis's public relay + Cloudflare target; Suite 9 exercises `numa relay` forwarding + guards against Cloudflare as the real far end. Full probe script at tests/probe-odoh-ecosystem.sh verifies the entire public ODoH ecosystem (4 targets + 1 relay per DNSCrypt's curated list — confirms deploying Numa's relay doubles global supply). --- Cargo.lock | 374 +++++++++++++++++++++++++- Cargo.toml | 4 + src/api.rs | 15 ++ src/config.rs | 177 +++++++++++- src/ctx.rs | 14 +- src/forward.rs | 119 +++++++-- src/lib.rs | 2 + src/main.rs | 17 ++ src/odoh.rs | 489 ++++++++++++++++++++++++++++++++++ src/relay.rs | 347 ++++++++++++++++++++++++ src/serve.rs | 39 +-- src/stats.rs | 62 ++++- tests/integration.sh | 197 ++++++++++++++ tests/probe-odoh-ecosystem.sh | 101 +++++++ 14 files changed, 1911 insertions(+), 46 deletions(-) create mode 100644 src/odoh.rs create mode 100644 src/relay.rs create mode 100755 tests/probe-odoh-ecosystem.sh diff --git a/Cargo.lock b/Cargo.lock index cf25b3a..2bfeaa6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,41 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -109,7 +144,7 @@ dependencies = [ "nom", "num-traits", "rusticata-macros", - "thiserror", + "thiserror 2.0.18", "time", ] @@ -257,6 +292,15 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -299,6 +343,30 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -326,6 +394,17 @@ dependencies = [ "half", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + [[package]] name = "clap" version = "4.6.0" @@ -383,6 +462,15 @@ version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -473,6 +561,51 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "typenum", +] + +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + +[[package]] +name = "curve25519-dalek" +version = "4.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" +dependencies = [ + "cfg-if", + "cpufeatures", + "curve25519-dalek-derive", + "fiat-crypto", + "rustc_version", + "subtle", +] + +[[package]] +name = "curve25519-dalek-derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "data-encoding" version = "2.10.0" @@ -502,6 +635,17 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -576,6 +720,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "fiat-crypto" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -707,6 +857,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -747,6 +907,16 @@ dependencies = [ "wasip3", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "h2" version = "0.4.13" @@ -820,7 +990,7 @@ dependencies = [ "rand", "ring", "rustls", - "thiserror", + "thiserror 2.0.18", "tinyvec", "tokio", "tokio-rustls", @@ -846,13 +1016,51 @@ dependencies = [ "resolv-conf", "rustls", "smallvec", - "thiserror", + "thiserror 2.0.18", "tokio", "tokio-rustls", "tracing", "webpki-roots 0.26.11", ] +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "hpke" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f65d16b699dd1a1fa2d851c970b0c971b388eeeb40f744252b8de48860980c8f" +dependencies = [ + "aead", + "aes-gcm", + "chacha20poly1305", + "digest", + "generic-array", + "hkdf", + "hmac", + "rand_core 0.9.5", + "sha2", + "subtle", + "x25519-dalek", + "zeroize", +] + [[package]] name = "http" version = "1.4.0" @@ -1081,6 +1289,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + [[package]] name = "ipconfig" version = "0.3.4" @@ -1344,7 +1561,9 @@ dependencies = [ "hyper", "hyper-util", "log", + "odoh-rs", "qrcode", + "rand_core 0.9.5", "rcgen", "reqwest", "ring", @@ -1363,6 +1582,19 @@ dependencies = [ "x509-parser", ] +[[package]] +name = "odoh-rs" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbb89720b7dfdddc89bc7560669d41a0bb68eb64784a4aebd293308a489f3837" +dependencies = [ + "aes-gcm", + "bytes", + "hkdf", + "hpke", + "thiserror 1.0.69", +] + [[package]] name = "oid-registry" version = "0.8.1" @@ -1394,6 +1626,12 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "page_size" version = "0.6.0" @@ -1483,6 +1721,29 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -1561,7 +1822,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -1582,7 +1843,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -1630,7 +1891,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", - "rand_core", + "rand_core 0.9.5", ] [[package]] @@ -1640,7 +1901,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] @@ -1789,6 +2059,15 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rusticata-macros" version = "4.1.0" @@ -1953,6 +2232,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2046,13 +2336,33 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -2298,6 +2608,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -2310,6 +2626,16 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "untrusted" version = "0.9.0" @@ -2351,6 +2677,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" @@ -2860,6 +3192,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "x25519-dalek" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7e468321c81fb07fa7f4c636c3972b9100f0346e5b6a9f2bd0603a52f7ed277" +dependencies = [ + "curve25519-dalek", + "rand_core 0.6.4", +] + [[package]] name = "x509-parser" version = "0.18.1" @@ -2874,7 +3216,7 @@ dependencies = [ "oid-registry", "ring", "rusticata-macros", - "thiserror", + "thiserror 2.0.18", "time", ] @@ -2956,6 +3298,20 @@ name = "zeroize" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "zerotrie" diff --git a/Cargo.toml b/Cargo.toml index 3b3234f..15601c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,10 @@ rustls = "0.23" tokio-rustls = "0.26" arc-swap = "1" ring = "0.17" +odoh-rs = "1" +# rand_core 0.9 matches the version odoh-rs (via hpke 0.13) depends on, so we +# share one RngCore trait and OsRng impl across the dep tree. +rand_core = { version = "0.9", features = ["os_rng"] } rustls-pemfile = "2.2.0" qrcode = { version = "0.14", default-features = false, features = ["svg"] } webpki-roots = "1" diff --git a/src/api.rs b/src/api.rs index dd1fe78..7f02920 100644 --- a/src/api.rs +++ b/src/api.rs @@ -170,6 +170,7 @@ struct StatsResponse { srtt: bool, queries: QueriesStats, transport: TransportStats, + upstream_transport: UpstreamTransportStats, cache: CacheStats, overrides: OverrideStats, blocking: BlockingStatsResponse, @@ -186,6 +187,14 @@ struct TransportStats { doh: u64, } +#[derive(Serialize)] +struct UpstreamTransportStats { + udp: u64, + doh: u64, + dot: u64, + odoh: u64, +} + #[derive(Serialize)] struct MobileStatsResponse { enabled: bool, @@ -566,6 +575,12 @@ async fn stats(State(ctx): State>) -> Json { dot: snap.transport_dot, doh: snap.transport_doh, }, + upstream_transport: UpstreamTransportStats { + udp: snap.upstream_transport_udp, + doh: snap.upstream_transport_doh, + dot: snap.upstream_transport_dot, + odoh: snap.upstream_transport_odoh, + }, cache: CacheStats { entries: cache_len, max_entries: cache_max, diff --git a/src/config.rs b/src/config.rs index 309344b..2d2f1ba 100644 --- a/src/config.rs +++ b/src/config.rs @@ -134,6 +134,7 @@ pub enum UpstreamMode { #[default] Forward, Recursive, + Odoh, } impl UpstreamMode { @@ -142,6 +143,7 @@ impl UpstreamMode { UpstreamMode::Auto => "auto", UpstreamMode::Forward => "forward", UpstreamMode::Recursive => "recursive", + UpstreamMode::Odoh => "odoh", } } } @@ -154,7 +156,7 @@ pub struct UpstreamConfig { pub address: Vec, #[serde(default = "default_upstream_port")] pub port: u16, - #[serde(default)] + #[serde(default, deserialize_with = "string_or_vec")] pub fallback: Vec, #[serde(default = "default_timeout_ms")] pub timeout_ms: u64, @@ -166,6 +168,20 @@ pub struct UpstreamConfig { pub prime_tlds: Vec, #[serde(default = "default_srtt")] pub srtt: bool, + + /// Only used when `mode = "odoh"`. Full https:// URL of the relay + /// endpoint (including path, e.g. `https://odoh-relay.numa.rs/relay`). + #[serde(default)] + pub relay: Option, + /// Only used when `mode = "odoh"`. Full https:// URL of the target + /// resolver (`https://odoh.cloudflare-dns.com/dns-query`). + #[serde(default)] + pub target: Option, + /// Only used when `mode = "odoh"`. When true (the default), relay failure + /// returns SERVFAIL instead of downgrading to the `fallback` upstream — + /// a user who configured ODoH rarely wants a silent non-oblivious path. + #[serde(default)] + pub strict: Option, } impl Default for UpstreamConfig { @@ -180,10 +196,75 @@ impl Default for UpstreamConfig { root_hints: default_root_hints(), prime_tlds: default_prime_tlds(), srtt: default_srtt(), + relay: None, + target: None, + strict: None, } } } +/// Parsed ODoH config fields. `mode = "odoh"` requires both URLs to be +/// present, to parse as `https://`, and to resolve to distinct hosts. +#[derive(Debug)] +pub struct OdohUpstream { + pub relay_url: String, + pub target_host: String, + pub target_path: String, + pub strict: bool, +} + +impl UpstreamConfig { + /// Validate and extract ODoH-specific fields. Called during `load_config` + /// so misconfigured ODoH fails fast at startup, the same care we take + /// with the DNSSEC strict boot check. + pub fn odoh_upstream(&self) -> Result { + let relay = self + .relay + .as_deref() + .ok_or("mode = \"odoh\" requires upstream.relay")?; + let target = self + .target + .as_deref() + .ok_or("mode = \"odoh\" requires upstream.target")?; + + let relay_url = reqwest::Url::parse(relay) + .map_err(|e| format!("upstream.relay invalid URL '{}': {}", relay, e))?; + let target_url = reqwest::Url::parse(target) + .map_err(|e| format!("upstream.target invalid URL '{}': {}", target, e))?; + + if relay_url.scheme() != "https" || target_url.scheme() != "https" { + return Err("upstream.relay and upstream.target must both use https://".into()); + } + if relay_url.host_str().is_none() || target_url.host_str().is_none() { + return Err("upstream.relay and upstream.target must include a host".into()); + } + if relay_url.host_str() == target_url.host_str() { + return Err(format!( + "upstream.relay and upstream.target resolve to the same host ({}); the privacy property requires distinct operators", + relay_url.host_str().unwrap_or("?") + ) + .into()); + } + + let target_host = target_url + .host_str() + .ok_or("upstream.target has no host")? + .to_string(); + let target_path = if target_url.path().is_empty() { + "/".to_string() + } else { + target_url.path().to_string() + }; + + Ok(OdohUpstream { + relay_url: relay.to_string(), + target_host, + target_path, + strict: self.strict.unwrap_or(true), + }) + } +} + fn string_or_vec<'de, D>(deserializer: D) -> std::result::Result, D::Error> where D: serde::Deserializer<'de>, @@ -643,12 +724,22 @@ mod tests { } #[test] - fn fallback_parses() { + fn fallback_array_parses() { let config: Config = toml::from_str("[upstream]\nfallback = [\"8.8.8.8\", \"1.1.1.1\"]").unwrap(); assert_eq!(config.upstream.fallback, vec!["8.8.8.8", "1.1.1.1"]); } + #[test] + fn fallback_string_parses_as_singleton_vec() { + let config: Config = + toml::from_str("[upstream]\nfallback = \"tls://1.1.1.1#cloudflare-dns.com\"").unwrap(); + assert_eq!( + config.upstream.fallback, + vec!["tls://1.1.1.1#cloudflare-dns.com"] + ); + } + #[test] fn empty_address_gives_empty_vec() { let config: Config = toml::from_str("").unwrap(); @@ -656,6 +747,88 @@ mod tests { assert!(config.upstream.fallback.is_empty()); } + // ── [upstream] mode = "odoh" ──────────────────────────────────────── + + #[test] + fn odoh_config_parses_and_validates() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + assert!(matches!(config.upstream.mode, UpstreamMode::Odoh)); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert_eq!(odoh.relay_url, "https://odoh-relay.numa.rs/relay"); + assert_eq!(odoh.target_host, "odoh.cloudflare-dns.com"); + assert_eq!(odoh.target_path, "/dns-query"); + assert!(odoh.strict, "strict defaults to true under mode=odoh"); + } + + #[test] + fn odoh_strict_false_is_honoured() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +strict = false +"#; + let config: Config = toml::from_str(toml).unwrap(); + assert!(!config.upstream.odoh_upstream().unwrap().strict); + } + + #[test] + fn odoh_rejects_same_host_relay_and_target() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh.example.com/relay" +target = "https://odoh.example.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("same host"), "got: {err}"); + } + + #[test] + fn odoh_rejects_non_https() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "http://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("https"), "got: {err}"); + } + + #[test] + fn odoh_missing_relay_rejected() { + let toml = r#" +[upstream] +mode = "odoh" +target = "https://odoh.cloudflare-dns.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("upstream.relay"), "got: {err}"); + } + + #[test] + fn odoh_missing_target_rejected() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("upstream.target"), "got: {err}"); + } + // ── issue #82: [[forwarding]] config section ──────────────────────── #[test] diff --git a/src/ctx.rs b/src/ctx.rs index 71e81c9..511b678 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -105,6 +105,7 @@ pub async fn resolve_query( // Pipeline: overrides -> .localhost -> local zones -> special-use (unless forwarded) // -> .tld proxy -> blocklist -> cache -> forwarding -> recursive/upstream // Each lock is scoped to avoid holding MutexGuard across await points. + let mut upstream_transport: Option = None; let (response, path, dnssec) = { let override_record = ctx.overrides.read().unwrap().lookup(&qname); if let Some(record) = override_record { @@ -208,6 +209,7 @@ pub async fn resolve_query( { // Conditional forwarding takes priority over recursive mode // (e.g. Tailscale .ts.net, VPC private zones) + upstream_transport = pool.preferred().map(|u| u.transport()); match forward_with_failover_raw( raw_wire, pool, @@ -241,6 +243,9 @@ pub async fn resolve_query( } } } else if ctx.upstream_mode == UpstreamMode::Recursive { + // Recursive resolution makes UDP hops to roots/TLDs/auths; + // tag as Udp so the dashboard can aggregate plaintext-wire + // egress honestly. Only mark on success — errors stay None. let key = (qname.clone(), qtype); let (resp, path, err) = resolve_coalesced(&ctx.inflight, key, &query, || { crate::recursive::resolve_recursive( @@ -263,6 +268,8 @@ pub async fn resolve_query( qname, err.as_deref().unwrap_or("leader failed") ); + } else { + upstream_transport = Some(crate::stats::UpstreamTransport::Udp); } (resp, path, DnssecStatus::Indeterminate) } else { @@ -277,7 +284,10 @@ pub async fn resolve_query( .await { Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) { - Ok(resp) => (resp, QueryPath::Upstream, DnssecStatus::Indeterminate), + Ok(resp) => { + upstream_transport = pool.preferred().map(|u| u.transport()); + (resp, QueryPath::Upstream, DnssecStatus::Indeterminate) + } Err(e) => { error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e); ( @@ -397,7 +407,7 @@ pub async fn resolve_query( // Record stats and query log { let mut s = ctx.stats.lock().unwrap(); - let total = s.record(path, transport); + let total = s.record(path, transport, upstream_transport); if total.is_multiple_of(1000) { s.log_summary(); } diff --git a/src/forward.rs b/src/forward.rs index 9bfa426..bb91fcf 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -1,14 +1,16 @@ use std::fmt; use std::net::{IpAddr, SocketAddr}; -use std::sync::RwLock; +use std::sync::{Arc, RwLock}; use std::time::{Duration, Instant}; use tokio::net::UdpSocket; use tokio::time::timeout; use crate::buffer::BytePacketBuffer; +use crate::odoh::{query_through_relay, OdohConfigCache}; use crate::packet::DnsPacket; use crate::srtt::SrttCache; +use crate::stats::UpstreamTransport; use crate::Result; #[derive(Clone)] @@ -23,16 +25,34 @@ pub enum Upstream { tls_name: Option, connector: tokio_rustls::TlsConnector, }, + /// Oblivious DNS-over-HTTPS (RFC 9230). Queries are HPKE-sealed to the + /// target and forwarded through an independent relay. Target host lives + /// on `target_config` (single source of truth — the cache keys on it). + Odoh { + relay_url: String, + target_path: String, + client: reqwest::Client, + target_config: Arc, + }, } impl Upstream { /// IP address to key SRTT tracking on, if the upstream has a stable one. - /// `Doh` routes through a URL + connection pool, so there's no single IP - /// to track; SRTT is skipped for it. + /// `Doh` and `Odoh` route through a URL + connection pool, so there's no + /// single IP to track; SRTT is skipped for them. pub fn tracked_ip(&self) -> Option { match self { Upstream::Udp(addr) | Upstream::Dot { addr, .. } => Some(addr.ip()), - Upstream::Doh { .. } => None, + Upstream::Doh { .. } | Upstream::Odoh { .. } => None, + } + } + + pub fn transport(&self) -> UpstreamTransport { + match self { + Upstream::Udp(_) => UpstreamTransport::Udp, + Upstream::Doh { .. } => UpstreamTransport::Doh, + Upstream::Dot { .. } => UpstreamTransport::Dot, + Upstream::Odoh { .. } => UpstreamTransport::Odoh, } } } @@ -43,6 +63,20 @@ impl PartialEq for Upstream { (Self::Udp(a), Self::Udp(b)) => a == b, (Self::Doh { url: a, .. }, Self::Doh { url: b, .. }) => a == b, (Self::Dot { addr: a, .. }, Self::Dot { addr: b, .. }) => a == b, + ( + Self::Odoh { + relay_url: ra, + target_path: pa, + target_config: ca, + .. + }, + Self::Odoh { + relay_url: rb, + target_path: pb, + target_config: cb, + .. + }, + ) => ra == rb && pa == pb && ca.target_host() == cb.target_host(), _ => false, } } @@ -63,6 +97,18 @@ impl fmt::Display for Upstream { Some(name) => write!(f, "tls://{}#{}", addr, name), None => write!(f, "tls://{}", addr), }, + Upstream::Odoh { + relay_url, + target_path, + target_config, + .. + } => write!( + f, + "odoh://{}{} via {}", + target_config.target_host(), + target_path, + relay_url + ), } } } @@ -82,22 +128,20 @@ pub(crate) fn parse_upstream_addr( Err(format!("invalid upstream address: {}", s)) } +/// Parse a slice of upstream address strings into `Upstream` values, failing +/// on the first invalid entry. +pub fn parse_upstream_list(addrs: &[String], default_port: u16) -> Result> { + addrs + .iter() + .map(|s| parse_upstream(s, default_port)) + .collect() +} + pub fn parse_upstream(s: &str, default_port: u16) -> Result { if s.starts_with("https://") { - let client = reqwest::Client::builder() - .use_rustls_tls() - .http2_initial_stream_window_size(65_535) - .http2_initial_connection_window_size(65_535) - .http2_keep_alive_interval(Duration::from_secs(15)) - .http2_keep_alive_while_idle(true) - .http2_keep_alive_timeout(Duration::from_secs(10)) - .pool_idle_timeout(Duration::from_secs(300)) - .pool_max_idle_per_host(1) - .build() - .unwrap_or_default(); return Ok(Upstream::Doh { url: s.to_string(), - client, + client: build_https_client(), }); } // tls://IP:PORT#hostname or tls://IP#hostname (default port 853) @@ -118,6 +162,33 @@ pub fn parse_upstream(s: &str, default_port: u16) -> Result { Ok(Upstream::Udp(addr)) } +/// HTTP/2 client tuned for DoH/ODoH: small windows for low latency, long-lived +/// keep-alive. Shared by the DoH upstream and the ODoH config-fetcher + +/// seal/open path. Pool defaults to one idle conn per host — good for +/// resolvers that talk to a single upstream; relays that fan out to many +/// targets should use [`build_https_client_with_pool`]. +pub fn build_https_client() -> reqwest::Client { + build_https_client_with_pool(1) +} + +/// Same shape as [`build_https_client`], but caller picks +/// `pool_max_idle_per_host`. Relay workloads hit many distinct target hosts +/// and benefit from a larger pool so warm connections survive concurrent +/// fan-out. +pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client { + reqwest::Client::builder() + .use_rustls_tls() + .http2_initial_stream_window_size(65_535) + .http2_initial_connection_window_size(65_535) + .http2_keep_alive_interval(Duration::from_secs(15)) + .http2_keep_alive_while_idle(true) + .http2_keep_alive_timeout(Duration::from_secs(10)) + .pool_idle_timeout(Duration::from_secs(300)) + .pool_max_idle_per_host(pool_max_idle_per_host) + .build() + .unwrap_or_default() +} + fn build_dot_connector() -> Result { let _ = rustls::crypto::ring::default_provider().install_default(); let mut root_store = rustls::RootCertStore::empty(); @@ -282,6 +353,22 @@ pub async fn forward_query_raw( tls_name, connector, } => forward_dot_raw(wire, *addr, tls_name, connector, timeout_duration).await, + Upstream::Odoh { + relay_url, + target_path, + client, + target_config, + } => { + query_through_relay( + wire, + relay_url, + target_path, + client, + target_config, + timeout_duration, + ) + .await + } } } diff --git a/src/lib.rs b/src/lib.rs index bce8833..aec568d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ pub mod health; pub mod lan; pub mod mobile_api; pub mod mobileconfig; +pub mod odoh; pub mod override_store; pub mod packet; pub mod proxy; @@ -20,6 +21,7 @@ pub mod query_log; pub mod question; pub mod record; pub mod recursive; +pub mod relay; pub mod serve; pub mod service_store; pub mod setup_phone; diff --git a/src/main.rs b/src/main.rs index 34bf747..e077a2f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -60,6 +60,22 @@ fn main() -> numa::Result<()> { .block_on(numa::setup_phone::run()) .map_err(|e| e.into()); } + "relay" => { + let port: u16 = std::env::args() + .nth(2) + .as_deref() + .and_then(|s| s.parse().ok()) + .unwrap_or(8443); + let addr: std::net::SocketAddr = ([127, 0, 0, 1], port).into(); + eprintln!( + "\x1b[1;38;2;192;98;58mNuma\x1b[0m — ODoH relay on {}\n", + addr + ); + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; + return runtime.block_on(numa::relay::run(addr)); + } "lan" => { let sub = std::env::args().nth(2).unwrap_or_default(); let config_path = std::env::args() @@ -91,6 +107,7 @@ fn main() -> numa::Result<()> { eprintln!(" service status Check if the service is running"); eprintln!(" lan on Enable LAN service discovery (mDNS)"); eprintln!(" lan off Disable LAN service discovery"); + eprintln!(" relay [PORT] Run as an ODoH relay (RFC 9230, default port 8443)"); eprintln!(" setup-phone Generate a QR code to install Numa DoT on a phone"); eprintln!(" help Show this help"); eprintln!(); diff --git a/src/odoh.rs b/src/odoh.rs new file mode 100644 index 0000000..2cfa9c5 --- /dev/null +++ b/src/odoh.rs @@ -0,0 +1,489 @@ +//! ODoH target-config fetcher and TTL cache (RFC 9230 §6). +//! +//! ## Ciphersuite policy +//! `odoh-rs` deserialization rejects any config whose KEM/KDF/AEAD triple is +//! not the mandatory `(X25519, HKDF-SHA256, AES-128-GCM)` (see +//! `ObliviousDoHConfigContents::deserialize`). This is stricter than the +//! plan's "pick the mandatory suite if mixed": a response containing *any* +//! non-mandatory config fails parse entirely. Real-world targets publish a +//! single mandatory config, so this is fine in practice; revisit if a target +//! that matters starts mixing suites. + +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use arc_swap::ArcSwapOption; +use odoh_rs::{ + ObliviousDoHConfigContents, ObliviousDoHConfigs, ObliviousDoHMessage, + ObliviousDoHMessagePlaintext, +}; +use rand_core::{OsRng, TryRngCore}; +use reqwest::header::HeaderMap; +use tokio::sync::Mutex; +use tokio::time::timeout; + +use crate::Result; + +/// MIME type used for both directions of the ODoH exchange (RFC 9230 §4). +const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; + +/// Cap on the response body we read into memory when the relay returns +/// non-success. Protects against a hostile relay streaming a huge body on +/// the error path; keeps enough room to carry a human-readable reason. +const ERROR_BODY_PREVIEW_BYTES: usize = 1024; + +/// Fallback TTL when the target's response lacks a usable `Cache-Control` +/// directive. RFC 9230 §6.2 places no hard floor; 24 h matches what Cloudflare +/// publishes in practice. +const DEFAULT_CONFIG_TTL: Duration = Duration::from_secs(24 * 60 * 60); + +/// Cap on any TTL we'll honour, regardless of what the target advertises. +/// Keeps a misconfigured server from pinning an old key indefinitely. +const MAX_CONFIG_TTL: Duration = Duration::from_secs(7 * 24 * 60 * 60); + +/// After a failed `/.well-known/odohconfigs` fetch, refuse to refetch again +/// within this window — a target that is genuinely broken would otherwise +/// receive one request per query. Queries that arrive during the backoff +/// return the cached error immediately. +const REFRESH_BACKOFF: Duration = Duration::from_secs(60); + +/// Parsed ODoH target config plus the freshness metadata needed to age it out. +#[derive(Debug)] +pub struct OdohTargetConfig { + pub contents: ObliviousDoHConfigContents, + pub key_id: Vec, + expires_at: Instant, +} + +impl OdohTargetConfig { + pub fn is_expired(&self) -> bool { + Instant::now() >= self.expires_at + } +} + +struct FailedRefresh { + at: Instant, + err: String, +} + +/// TTL-gated cache of a single target's HPKE config. +/// +/// Reads go through `ArcSwapOption` (lock-free hot path). Refreshes serialize +/// on an async mutex so a burst of simultaneous misses produces a single +/// outbound fetch, and a failed refresh blocks subsequent refetches for +/// [`REFRESH_BACKOFF`] to prevent hot-looping against a broken target. +pub struct OdohConfigCache { + target_host: String, + configs_url: String, + client: reqwest::Client, + current: ArcSwapOption, + last_failure: ArcSwapOption, + refresh_lock: Mutex<()>, +} + +impl OdohConfigCache { + pub fn new(target_host: String, client: reqwest::Client) -> Self { + let configs_url = format!("https://{}/.well-known/odohconfigs", target_host); + Self { + target_host, + configs_url, + client, + current: ArcSwapOption::from(None), + last_failure: ArcSwapOption::from(None), + refresh_lock: Mutex::new(()), + } + } + + pub fn target_host(&self) -> &str { + &self.target_host + } + + /// Return a valid config, refetching when the cache is cold or expired. + /// Within [`REFRESH_BACKOFF`] of a failed refresh, returns the cached + /// error without issuing another fetch. + pub async fn get(&self) -> Result> { + if let Some(cfg) = self.current.load_full() { + if !cfg.is_expired() { + return Ok(cfg); + } + } + + if let Some(err) = self.backoff_error() { + return Err(err); + } + + let _guard = self.refresh_lock.lock().await; + + // Another task may have refreshed or failed while we waited. + if let Some(cfg) = self.current.load_full() { + if !cfg.is_expired() { + return Ok(cfg); + } + } + if let Some(err) = self.backoff_error() { + return Err(err); + } + + match fetch_odoh_config(&self.client, &self.configs_url).await { + Ok(fresh) => { + let fresh = Arc::new(fresh); + self.current.store(Some(fresh.clone())); + self.last_failure.store(None); + Ok(fresh) + } + Err(e) => { + let msg = format!("ODoH config fetch failed: {e}"); + self.last_failure.store(Some(Arc::new(FailedRefresh { + at: Instant::now(), + err: msg.clone(), + }))); + Err(msg.into()) + } + } + } + + /// Drop the cached config. Called after the target rejects ciphertext + /// (key rotation race) so the next `get()` refetches. + pub fn invalidate(&self) { + self.current.store(None); + } + + fn backoff_error(&self) -> Option { + let fail = self.last_failure.load_full()?; + if fail.at.elapsed() < REFRESH_BACKOFF { + Some(format!("{} (backoff active)", fail.err).into()) + } else { + None + } + } +} + +/// Fetch `/.well-known/odohconfigs` from `configs_url` and parse it into an +/// [`OdohTargetConfig`]. The TTL is taken from the response's +/// `Cache-Control: max-age=`, clamped to [`DEFAULT_CONFIG_TTL`, +/// [`MAX_CONFIG_TTL`]] when absent or obviously wrong. +pub async fn fetch_odoh_config( + client: &reqwest::Client, + configs_url: &str, +) -> Result { + let resp = client.get(configs_url).send().await?.error_for_status()?; + let ttl = cache_control_ttl(resp.headers()).unwrap_or(DEFAULT_CONFIG_TTL); + let body = resp.bytes().await?; + parse_odoh_config(&body, ttl) +} + +fn parse_odoh_config(body: &[u8], ttl: Duration) -> Result { + let mut buf = body; + let configs: ObliviousDoHConfigs = odoh_rs::parse(&mut buf) + .map_err(|e| format!("failed to parse ObliviousDoHConfigs: {e}"))?; + let first = configs + .into_iter() + .next() + .ok_or("target published no ODoH configs with a supported version + ciphersuite")?; + let contents: ObliviousDoHConfigContents = first.into(); + let key_id = contents + .identifier() + .map_err(|e| format!("failed to derive key_id from ODoH config: {e}"))?; + Ok(OdohTargetConfig { + contents, + key_id, + expires_at: Instant::now() + ttl.min(MAX_CONFIG_TTL), + }) +} + +/// Send a DNS wire query through an ODoH relay to a target and return the +/// plaintext DNS wire response. +/// +/// Flow: fetch the target's HPKE config (cached), seal the query, POST to the +/// relay with `Targethost`/`Targetpath` headers, then unseal the response. +/// On seal/unseal failure we invalidate the cache and retry once — this +/// handles the benign race where the target rotated its key between our +/// cached config and the POST. +pub async fn query_through_relay( + wire: &[u8], + relay_url: &str, + target_path: &str, + client: &reqwest::Client, + cache: &OdohConfigCache, + timeout_duration: Duration, +) -> Result> { + let req = OdohRequest { + wire, + relay_url, + target_path, + client, + cache, + timeout: timeout_duration, + }; + match attempt_query(&req).await { + Ok(v) => Ok(v), + Err(AttemptError::KeyRotation(_)) => { + cache.invalidate(); + attempt_query(&req).await.map_err(AttemptError::into_error) + } + Err(e) => Err(e.into_error()), + } +} + +struct OdohRequest<'a> { + wire: &'a [u8], + relay_url: &'a str, + target_path: &'a str, + client: &'a reqwest::Client, + cache: &'a OdohConfigCache, + timeout: Duration, +} + +/// Classification used only by the retry path in [`query_through_relay`]. +enum AttemptError { + /// Target signalled the config we used is stale (key rotation race). + /// Callers should invalidate the cache and retry exactly once. + KeyRotation(String), + /// Any other failure — transport, timeout, malformed response. + Other(crate::Error), +} + +impl AttemptError { + fn into_error(self) -> crate::Error { + match self { + AttemptError::KeyRotation(m) => format!("ODoH key rotation race: {m}").into(), + AttemptError::Other(e) => e, + } + } +} + +async fn attempt_query(req: &OdohRequest<'_>) -> std::result::Result, AttemptError> { + let cfg = req.cache.get().await.map_err(AttemptError::Other)?; + + let plaintext = ObliviousDoHMessagePlaintext::new(req.wire, 0); + // rand_core 0.9's OsRng is fallible-only; wrap for the infallible bound. + let mut os = OsRng; + let mut rng = os.unwrap_mut(); + let (encrypted_query, client_secret) = + odoh_rs::encrypt_query(&plaintext, &cfg.contents, &mut rng) + .map_err(|e| AttemptError::Other(format!("ODoH encrypt failed: {e}").into()))?; + let body = odoh_rs::compose(&encrypted_query) + .map_err(|e| AttemptError::Other(format!("ODoH compose failed: {e}").into()))? + .freeze(); + + // RFC 9230 §5 and the reference client use URL query parameters, not + // HTTP headers, to carry the target routing. `Targethost`/`Targetpath` + // headers cause relays to treat the request as an unspecified-target and + // reject it. + let (status, resp_body) = timeout(req.timeout, async { + let resp = req + .client + .post(req.relay_url) + .header(reqwest::header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .header(reqwest::header::ACCEPT, ODOH_CONTENT_TYPE) + .header(reqwest::header::CACHE_CONTROL, "no-cache, no-store") + .query(&[ + ("targethost", req.cache.target_host()), + ("targetpath", req.target_path), + ]) + .body(body) + .send() + .await?; + let status = resp.status(); + let body = resp.bytes().await?; + Ok::<_, reqwest::Error>((status, body)) + }) + .await + .map_err(|_| AttemptError::Other("ODoH relay request timed out".into()))? + .map_err(|e| AttemptError::Other(format!("ODoH relay request failed: {e}").into()))?; + + // RFC 9230 §4.3 expects a target that can't decrypt to reply with a DNS + // error in a sealed 200 response; a 401 from the relay/target is the + // practical signal that our cached HPKE key is stale. Treat 400 as a + // client-side bug (malformed ODoH envelope) — retrying would loop-fail. + if !status.is_success() { + let preview_len = resp_body.len().min(ERROR_BODY_PREVIEW_BYTES); + let body_preview = String::from_utf8_lossy(&resp_body[..preview_len]); + let msg = format!("ODoH relay returned {status}: {}", body_preview.trim()); + return Err(if status.as_u16() == 401 { + AttemptError::KeyRotation(msg) + } else { + AttemptError::Other(msg.into()) + }); + } + + let mut buf = resp_body; + let encrypted_response: ObliviousDoHMessage = odoh_rs::parse(&mut buf) + .map_err(|e| AttemptError::Other(format!("ODoH response parse failed: {e}").into()))?; + let plaintext_response = + odoh_rs::decrypt_response(&plaintext, &encrypted_response, client_secret) + .map_err(|e| AttemptError::KeyRotation(format!("ODoH decrypt failed: {e}")))?; + + Ok(plaintext_response.into_msg().to_vec()) +} + +fn cache_control_ttl(headers: &HeaderMap) -> Option { + let cc = headers.get(reqwest::header::CACHE_CONTROL)?.to_str().ok()?; + for directive in cc.split(',') { + let directive = directive.trim(); + if let Some(rest) = directive.strip_prefix("max-age=") { + if let Ok(secs) = rest.trim().parse::() { + if secs > 0 { + return Some(Duration::from_secs(secs)); + } + } + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + use odoh_rs::{ObliviousDoHConfig, ObliviousDoHKeyPair}; + + // RFC 9180 HPKE IDs for the sole ODoH mandatory suite: + // KEM = X25519, KDF = HKDF-SHA256, AEAD = AES-128-GCM. + const KEM_X25519: u16 = 0x0020; + const KDF_SHA256: u16 = 0x0001; + const AEAD_AES128GCM: u16 = 0x0001; + + fn synth_configs_bytes() -> Vec { + let kp = ObliviousDoHKeyPair::from_parameters( + KEM_X25519, + KDF_SHA256, + AEAD_AES128GCM, + &[0u8; 32], + ); + let pk = kp.public().clone(); + let configs: ObliviousDoHConfigs = vec![ObliviousDoHConfig::from(pk)].into(); + odoh_rs::compose(&configs).unwrap().to_vec() + } + + #[test] + fn parse_accepts_well_formed_config() { + let bytes = synth_configs_bytes(); + let cfg = parse_odoh_config(&bytes, Duration::from_secs(3600)).unwrap(); + assert!(!cfg.key_id.is_empty()); + assert!(!cfg.is_expired()); + } + + #[test] + fn parse_rejects_garbage() { + let bytes = [0xffu8; 16]; + assert!(parse_odoh_config(&bytes, Duration::from_secs(3600)).is_err()); + } + + #[test] + fn parse_rejects_empty() { + assert!(parse_odoh_config(&[], Duration::from_secs(3600)).is_err()); + } + + #[test] + fn ttl_capped_at_max() { + let bytes = synth_configs_bytes(); + let cfg = parse_odoh_config(&bytes, Duration::from_secs(100 * 24 * 60 * 60)).unwrap(); + let remaining = cfg.expires_at.saturating_duration_since(Instant::now()); + assert!(remaining <= MAX_CONFIG_TTL); + assert!(remaining >= MAX_CONFIG_TTL - Duration::from_secs(1)); + } + + #[test] + fn cache_control_parses_max_age() { + let mut h = HeaderMap::new(); + h.insert("cache-control", "public, max-age=86400".parse().unwrap()); + assert_eq!(cache_control_ttl(&h), Some(Duration::from_secs(86400))); + } + + #[test] + fn cache_control_ignores_max_age_zero() { + let mut h = HeaderMap::new(); + h.insert("cache-control", "max-age=0, no-store".parse().unwrap()); + assert_eq!(cache_control_ttl(&h), None); + } + + #[test] + fn cache_control_missing_falls_back() { + let h = HeaderMap::new(); + assert_eq!(cache_control_ttl(&h), None); + } + + #[test] + fn is_expired_tracks_ttl() { + let bytes = synth_configs_bytes(); + let mut cfg = parse_odoh_config(&bytes, Duration::from_secs(3600)).unwrap(); + assert!(!cfg.is_expired()); + cfg.expires_at = Instant::now() - Duration::from_secs(1); + assert!(cfg.is_expired()); + } + + #[tokio::test] + async fn cache_backoff_blocks_refetch_after_failure() { + // Point the cache at a host that does not exist so the fetch fails + // deterministically; this exercises the backoff wiring without a + // network round-trip succeeding. + let cache = OdohConfigCache::new( + "odoh-target.invalid".to_string(), + reqwest::Client::builder() + .timeout(Duration::from_millis(200)) + .build() + .unwrap(), + ); + + let first = cache.get().await; + assert!(first.is_err(), "first fetch must fail against invalid host"); + + // Within the backoff window, the cached error is returned immediately. + let second = cache.get().await.unwrap_err().to_string(); + assert!( + second.contains("backoff active"), + "expected backoff hint, got: {second}" + ); + + // Reaching past the backoff window allows a fresh attempt — simulate + // by rewinding the recorded failure timestamp. + cache.last_failure.store(Some(Arc::new(FailedRefresh { + at: Instant::now() - (REFRESH_BACKOFF + Duration::from_secs(1)), + err: "prior".to_string(), + }))); + let third = cache.get().await.unwrap_err().to_string(); + assert!( + !third.contains("backoff active"), + "expected fresh fetch attempt, got: {third}" + ); + } + + /// Round-trip the HPKE seal/unseal path in isolation from HTTP, using the + /// odoh-rs primitives that `query_through_relay` wires together. Guards + /// against silently breaking the crypto glue if we refactor that path. + #[test] + fn seal_unseal_round_trip() { + use odoh_rs::{decrypt_query, encrypt_response, ResponseNonce}; + + let kp = ObliviousDoHKeyPair::from_parameters( + KEM_X25519, + KDF_SHA256, + AEAD_AES128GCM, + &[0u8; 32], + ); + + let query_wire = b"\x12\x34\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x07example\x03com\x00\x00\x01\x00\x01"; + let query_pt = ObliviousDoHMessagePlaintext::new(query_wire, 0); + let mut os = OsRng; + let mut rng = os.unwrap_mut(); + let (query_enc, client_secret) = + odoh_rs::encrypt_query(&query_pt, kp.public(), &mut rng).unwrap(); + + let (query_back, server_secret) = decrypt_query(&query_enc, &kp).unwrap(); + assert_eq!(query_back.into_msg().as_ref(), query_wire); + + let response_wire = b"\x12\x34\x81\x80\x00\x01\x00\x01\x00\x00\x00\x00"; + let response_pt = ObliviousDoHMessagePlaintext::new(response_wire, 0); + let response_enc = encrypt_response( + &query_pt, + &response_pt, + server_secret, + ResponseNonce::default(), + ) + .unwrap(); + + let response_back = + odoh_rs::decrypt_response(&query_pt, &response_enc, client_secret).unwrap(); + assert_eq!(response_back.into_msg().as_ref(), response_wire); + } +} diff --git a/src/relay.rs b/src/relay.rs new file mode 100644 index 0000000..8d6ab40 --- /dev/null +++ b/src/relay.rs @@ -0,0 +1,347 @@ +//! ODoH relay (RFC 9230 §5) — the forward-without-reading half of the +//! protocol. Runs `numa relay`; skips all resolver initialisation (no port +//! 53, no cache, no recursion, no dashboard). The relay never reads the +//! HPKE-sealed payload and keeps no per-request logs — only aggregate +//! counters. + +use std::net::SocketAddr; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +use axum::body::Bytes; +use axum::extract::{DefaultBodyLimit, Query, State}; +use axum::http::{header, StatusCode}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::Router; +use log::{error, info}; +use serde::Deserialize; +use tokio::net::TcpListener; + +use crate::forward::build_https_client_with_pool; +use crate::Result; + +const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; + +/// Cap on the opaque body we accept from a client. ODoH envelopes are +/// ~100–300 bytes in practice; anything larger is malformed or hostile. +const MAX_BODY_BYTES: usize = 4 * 1024; + +/// Cap on the body we read back from the target before streaming to client. +/// Slightly larger: target responses carry DNS answers plus HPKE overhead. +const MAX_TARGET_RESPONSE_BYTES: usize = 8 * 1024; + +/// Covers the whole client-to-target round trip — not just `.send()` — so a +/// slow-drip target can't hang a worker indefinitely after headers arrive. +const TARGET_REQUEST_TIMEOUT: Duration = Duration::from_secs(5); + +/// The relay hits many distinct target hosts on behalf of clients. A +/// per-host idle pool of 4 keeps warm TLS connections available for concurrent +/// fan-out without blowing up memory on a small VPS. +const RELAY_POOL_PER_HOST: usize = 4; + +#[derive(Deserialize)] +struct RelayParams { + targethost: String, + targetpath: String, +} + +struct RelayState { + client: reqwest::Client, + total_requests: AtomicU64, + forwarded_ok: AtomicU64, + forwarded_err: AtomicU64, + rejected_bad_request: AtomicU64, +} + +pub async fn run(addr: SocketAddr) -> Result<()> { + let state = Arc::new(RelayState { + client: build_https_client_with_pool(RELAY_POOL_PER_HOST), + total_requests: AtomicU64::new(0), + forwarded_ok: AtomicU64::new(0), + forwarded_err: AtomicU64::new(0), + rejected_bad_request: AtomicU64::new(0), + }); + + let app = Router::new() + .route("/relay", post(handle_relay)) + // Overrides axum's default (2 MiB) so hostile clients can't force + // the relay to buffer multi-MB bodies before our own cap check. + .layer(DefaultBodyLimit::max(MAX_BODY_BYTES)) + .route("/health", get(handle_health)) + .with_state(state); + + let listener = TcpListener::bind(addr).await?; + info!("ODoH relay listening on {}", addr); + axum::serve(listener, app).await?; + Ok(()) +} + +async fn handle_health(State(state): State>) -> impl IntoResponse { + let body = format!( + "ok\ntotal {}\nforwarded_ok {}\nforwarded_err {}\nrejected_bad_request {}\n", + state.total_requests.load(Ordering::Relaxed), + state.forwarded_ok.load(Ordering::Relaxed), + state.forwarded_err.load(Ordering::Relaxed), + state.rejected_bad_request.load(Ordering::Relaxed), + ); + ( + StatusCode::OK, + [(header::CONTENT_TYPE, "text/plain; charset=utf-8")], + body, + ) +} + +async fn handle_relay( + State(state): State>, + Query(params): Query, + headers: axum::http::HeaderMap, + body: Bytes, +) -> Response { + state.total_requests.fetch_add(1, Ordering::Relaxed); + + if !content_type_matches(&headers, ODOH_CONTENT_TYPE) { + state.rejected_bad_request.fetch_add(1, Ordering::Relaxed); + return ( + StatusCode::UNSUPPORTED_MEDIA_TYPE, + "expected application/oblivious-dns-message", + ) + .into_response(); + } + + if body.len() > MAX_BODY_BYTES { + state.rejected_bad_request.fetch_add(1, Ordering::Relaxed); + return (StatusCode::PAYLOAD_TOO_LARGE, "body exceeds 4 KiB cap").into_response(); + } + + if !is_valid_hostname(¶ms.targethost) || !params.targetpath.starts_with('/') { + state.rejected_bad_request.fetch_add(1, Ordering::Relaxed); + return (StatusCode::BAD_REQUEST, "invalid targethost or targetpath").into_response(); + } + + let target_url = format!("https://{}{}", params.targethost, params.targetpath); + match forward_to_target(&state.client, &target_url, body).await { + Ok((status, resp_body)) => { + state.forwarded_ok.fetch_add(1, Ordering::Relaxed); + ( + status, + [(header::CONTENT_TYPE, ODOH_CONTENT_TYPE)], + resp_body, + ) + .into_response() + } + Err(e) => { + // Log the underlying reason for operators; don't leak reqwest + // internals (which can reveal the target's TLS config, IP, etc.) + // back to arbitrary clients. + error!("relay forward to {} failed: {}", target_url, e); + state.forwarded_err.fetch_add(1, Ordering::Relaxed); + (StatusCode::BAD_GATEWAY, "target unreachable").into_response() + } + } +} + +async fn forward_to_target( + client: &reqwest::Client, + url: &str, + body: Bytes, +) -> Result<(StatusCode, Bytes)> { + let response = tokio::time::timeout(TARGET_REQUEST_TIMEOUT, async { + let resp = client + .post(url) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .header(header::ACCEPT, ODOH_CONTENT_TYPE) + .body(body) + .send() + .await?; + let status = StatusCode::from_u16(resp.status().as_u16())?; + let resp_body = resp.bytes().await?; + Ok::<_, crate::Error>((status, resp_body)) + }) + .await + .map_err(|_| "timed out talking to target")??; + + if response.1.len() > MAX_TARGET_RESPONSE_BYTES { + return Err("target response exceeds cap".into()); + } + Ok(response) +} + +fn content_type_matches(headers: &axum::http::HeaderMap, expected: &str) -> bool { + headers + .get(header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .map(|ct| ct.split(';').next().unwrap_or("").trim() == expected) + .unwrap_or(false) +} + +/// Strict DNS-hostname validator, aimed at closing the SSRF surface a naive +/// `contains('.')` check leaves open (e.g. `example.com@internal.host`, +/// `evil.com/../admin`). Requires ASCII letters/digits/dot/dash, at least +/// one dot, no leading dot or dash, length ≤ 253 per RFC 1035. +fn is_valid_hostname(h: &str) -> bool { + if h.is_empty() || h.len() > 253 || !h.contains('.') { + return false; + } + if h.starts_with('.') || h.starts_with('-') || h.ends_with('.') || h.ends_with('-') { + return false; + } + h.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-') +} + +#[cfg(test)] +mod tests { + use super::*; + + async fn spawn_relay() -> (SocketAddr, Arc) { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let state = Arc::new(RelayState { + client: build_https_client_with_pool(RELAY_POOL_PER_HOST), + total_requests: AtomicU64::new(0), + forwarded_ok: AtomicU64::new(0), + forwarded_err: AtomicU64::new(0), + rejected_bad_request: AtomicU64::new(0), + }); + + let app = Router::new() + .route("/relay", post(handle_relay)) + .layer(DefaultBodyLimit::max(MAX_BODY_BYTES)) + .route("/health", get(handle_health)) + .with_state(state.clone()); + + tokio::spawn(async move { + let _ = axum::serve(listener, app).await; + }); + (addr, state) + } + + #[tokio::test] + async fn rejects_missing_content_type() { + let (addr, state) = spawn_relay().await; + let client = reqwest::Client::new(); + let resp = client + .post(format!( + "http://{}/relay?targethost=odoh.example.com&targetpath=/dns-query", + addr + )) + .body("body") + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::UNSUPPORTED_MEDIA_TYPE); + assert_eq!(state.rejected_bad_request.load(Ordering::Relaxed), 1); + } + + #[tokio::test] + async fn rejects_oversized_body() { + let (addr, _state) = spawn_relay().await; + let big = vec![0u8; MAX_BODY_BYTES + 1]; + let client = reqwest::Client::new(); + let resp = client + .post(format!( + "http://{}/relay?targethost=odoh.example.com&targetpath=/dns-query", + addr + )) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .body(big) + .send() + .await + .unwrap(); + // axum's DefaultBodyLimit rejects before our handler runs, so the + // counter doesn't increment — but the status code proves the layer + // enforced the cap. Either status is acceptable evidence. + assert!(matches!( + resp.status(), + reqwest::StatusCode::PAYLOAD_TOO_LARGE | reqwest::StatusCode::BAD_REQUEST + )); + } + + #[tokio::test] + async fn rejects_targethost_without_dot() { + let (addr, state) = spawn_relay().await; + let client = reqwest::Client::new(); + let resp = client + .post(format!( + "http://{}/relay?targethost=localhost&targetpath=/dns-query", + addr + )) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .body("body") + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); + assert_eq!(state.rejected_bad_request.load(Ordering::Relaxed), 1); + } + + #[tokio::test] + async fn rejects_userinfo_ssrf_attempt() { + let (addr, state) = spawn_relay().await; + let client = reqwest::Client::new(); + // The naive contains('.') check would let this through and reqwest + // would route to `internal.host` using `evil.com` as userinfo. + let resp = client + .post(format!( + "http://{}/relay?targethost=evil.com@internal.host&targetpath=/dns-query", + addr + )) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .body("body") + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); + assert_eq!(state.rejected_bad_request.load(Ordering::Relaxed), 1); + } + + #[tokio::test] + async fn rejects_targetpath_without_leading_slash() { + let (addr, state) = spawn_relay().await; + let client = reqwest::Client::new(); + let resp = client + .post(format!( + "http://{}/relay?targethost=odoh.example.com&targetpath=dns-query", + addr + )) + .header(header::CONTENT_TYPE, ODOH_CONTENT_TYPE) + .body("body") + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); + assert_eq!(state.rejected_bad_request.load(Ordering::Relaxed), 1); + } + + #[tokio::test] + async fn health_endpoint_reports_counters() { + let (addr, _state) = spawn_relay().await; + let client = reqwest::Client::new(); + let resp = client + .get(format!("http://{}/health", addr)) + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::OK); + let body = resp.text().await.unwrap(); + assert!(body.contains("ok\n")); + assert!(body.contains("forwarded_ok 0")); + } + + #[test] + fn hostname_validator_accepts_and_rejects() { + assert!(is_valid_hostname("odoh.cloudflare-dns.com")); + assert!(is_valid_hostname("a.b")); + assert!(!is_valid_hostname("")); + assert!(!is_valid_hostname("localhost")); + assert!(!is_valid_hostname(".leading.dot")); + assert!(!is_valid_hostname("trailing.dot.")); + assert!(!is_valid_hostname("-leading.dash")); + assert!(!is_valid_hostname("evil.com@internal.host")); + assert!(!is_valid_hostname("evil.com/../admin")); + assert!(!is_valid_hostname(&"a".repeat(254))); + } +} diff --git a/src/serve.rs b/src/serve.rs index 8e85b32..2037857 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -17,7 +17,8 @@ use crate::buffer::BytePacketBuffer; use crate::cache::DnsCache; use crate::config::{build_zone_map, load_config, ConfigLoad}; use crate::ctx::{handle_query, ServerCtx}; -use crate::forward::{parse_upstream, Upstream, UpstreamPool}; +use crate::forward::{build_https_client, parse_upstream_list, Upstream, UpstreamPool}; +use crate::odoh::OdohConfigCache; use crate::override_store::OverrideStore; use crate::query_log::QueryLog; use crate::service_store::ServiceStore; @@ -54,10 +55,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { (crate::config::UpstreamMode::Recursive, false, pool, label) } else { log::warn!("recursive probe failed — falling back to Quad9 DoH"); - let client = reqwest::Client::builder() - .use_rustls_tls() - .build() - .unwrap_or_default(); + let client = build_https_client(); let url = DOH_FALLBACK.to_string(); let label = url.clone(); let pool = UpstreamPool::new(vec![Upstream::Doh { url, client }], vec![]); @@ -82,16 +80,8 @@ pub async fn run(config_path: String) -> crate::Result<()> { config.upstream.address.clone() }; - let primary: Vec = addrs - .iter() - .map(|s| parse_upstream(s, config.upstream.port)) - .collect::>>()?; - let fallback: Vec = config - .upstream - .fallback - .iter() - .map(|s| parse_upstream(s, config.upstream.port)) - .collect::>>()?; + let primary = parse_upstream_list(&addrs, config.upstream.port)?; + let fallback = parse_upstream_list(&config.upstream.fallback, config.upstream.port)?; let pool = UpstreamPool::new(primary, fallback); let label = pool.label(); @@ -102,6 +92,25 @@ pub async fn run(config_path: String) -> crate::Result<()> { label, ) } + crate::config::UpstreamMode::Odoh => { + let odoh = config.upstream.odoh_upstream()?; + let client = build_https_client(); + let target_config = Arc::new(OdohConfigCache::new(odoh.target_host, client.clone())); + let primary = vec![Upstream::Odoh { + relay_url: odoh.relay_url, + target_path: odoh.target_path, + client, + target_config, + }]; + let fallback = if odoh.strict { + Vec::new() + } else { + parse_upstream_list(&config.upstream.fallback, config.upstream.port)? + }; + let pool = UpstreamPool::new(primary, fallback); + let label = pool.label(); + (crate::config::UpstreamMode::Odoh, false, pool, label) + } }; let api_port = config.server.api_port; diff --git a/src/stats.rs b/src/stats.rs index df9127c..acedec1 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -102,6 +102,10 @@ pub struct ServerStats { transport_tcp: u64, transport_dot: u64, transport_doh: u64, + upstream_transport_udp: u64, + upstream_transport_doh: u64, + upstream_transport_dot: u64, + upstream_transport_odoh: u64, started_at: Instant, } @@ -124,6 +128,31 @@ impl Transport { } } +/// Wire protocol used for a forwarded upstream call. Orthogonal to +/// `QueryPath`: the path answers "where the answer came from"; this answers +/// "over what wire we spoke to the forwarder." Callers pass +/// `Option` — `None` for resolutions that never touched +/// a forwarder (cache/local/blocked) or for recursive mode, which has its +/// own counter via `QueryPath::Recursive`. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UpstreamTransport { + Udp, + Doh, + Dot, + Odoh, +} + +impl UpstreamTransport { + pub fn as_str(&self) -> &'static str { + match self { + UpstreamTransport::Udp => "UDP", + UpstreamTransport::Doh => "DOH", + UpstreamTransport::Dot => "DOT", + UpstreamTransport::Odoh => "ODOH", + } + } +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum QueryPath { Local, @@ -202,11 +231,20 @@ impl ServerStats { transport_tcp: 0, transport_dot: 0, transport_doh: 0, + upstream_transport_udp: 0, + upstream_transport_doh: 0, + upstream_transport_dot: 0, + upstream_transport_odoh: 0, started_at: Instant::now(), } } - pub fn record(&mut self, path: QueryPath, transport: Transport) -> u64 { + pub fn record( + &mut self, + path: QueryPath, + transport: Transport, + upstream_transport: Option, + ) -> u64 { self.queries_total += 1; match path { QueryPath::Local => self.queries_local += 1, @@ -225,6 +263,14 @@ impl ServerStats { Transport::Dot => self.transport_dot += 1, Transport::Doh => self.transport_doh += 1, } + if let Some(ut) = upstream_transport { + match ut { + UpstreamTransport::Udp => self.upstream_transport_udp += 1, + UpstreamTransport::Doh => self.upstream_transport_doh += 1, + UpstreamTransport::Dot => self.upstream_transport_dot += 1, + UpstreamTransport::Odoh => self.upstream_transport_odoh += 1, + } + } self.queries_total } @@ -253,6 +299,10 @@ impl ServerStats { transport_tcp: self.transport_tcp, transport_dot: self.transport_dot, transport_doh: self.transport_doh, + upstream_transport_udp: self.upstream_transport_udp, + upstream_transport_doh: self.upstream_transport_doh, + upstream_transport_dot: self.upstream_transport_dot, + upstream_transport_odoh: self.upstream_transport_odoh, } } @@ -263,7 +313,7 @@ impl ServerStats { let secs = uptime.as_secs() % 60; log::info!( - "STATS | uptime {}h{}m{}s | total {} | fwd {} | upstream {} | recursive {} | coalesced {} | cached {} | local {} | override {} | blocked {} | errors {}", + "STATS | uptime {}h{}m{}s | total {} | fwd {} | upstream {} | recursive {} | coalesced {} | cached {} | local {} | override {} | blocked {} | errors {} | up-udp {} | up-doh {} | up-dot {} | up-odoh {}", hours, mins, secs, self.queries_total, self.queries_forwarded, @@ -275,6 +325,10 @@ impl ServerStats { self.queries_overridden, self.queries_blocked, self.upstream_errors, + self.upstream_transport_udp, + self.upstream_transport_doh, + self.upstream_transport_dot, + self.upstream_transport_odoh, ); } } @@ -295,4 +349,8 @@ pub struct StatsSnapshot { pub transport_tcp: u64, pub transport_dot: u64, pub transport_doh: u64, + pub upstream_transport_udp: u64, + pub upstream_transport_doh: u64, + pub upstream_transport_dot: u64, + pub upstream_transport_odoh: u64, } diff --git a/tests/integration.sh b/tests/integration.sh index 81bd28d..77b874f 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -854,6 +854,203 @@ sleep 1 fi # end Suite 7 +# ---- Suite 8: ODoH (Oblivious DoH via public relay + target) ---- +# Exercises the full client pipeline: /.well-known/odohconfigs fetch, +# HPKE seal/unseal, URL-query target routing (RFC 9230 §5), dashboard +# QueryPath::Odoh counter. Depends on the public ecosystem being up — +# the probe-odoh-ecosystem.sh script guards against flaky runs. +if should_run_suite 8; then +echo "" +echo "╔══════════════════════════════════════════╗" +echo "║ Suite 8: ODoH (Anonymous DNS) ║" +echo "╚══════════════════════════════════════════╝" + +run_test_suite "ODoH via edgecompute.app relay → Cloudflare target" " +[server] +bind_addr = \"127.0.0.1:5354\" +api_port = 5381 + +[upstream] +mode = \"odoh\" +relay = \"https://odoh-relay.edgecompute.app/proxy\" +target = \"https://odoh.cloudflare-dns.com/dns-query\" + +[cache] +max_entries = 10000 +min_ttl = 60 +max_ttl = 86400 + +[blocking] +enabled = false + +[proxy] +enabled = false +" + +# Re-start briefly to assert ODoH-specific observability: the odoh counter +# has to tick above zero after a query, and the stats label has to reflect +# the oblivious path. These guard against silent regressions in the +# QueryPath::Odoh tagging and the /stats serialisation. +RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & +NUMA_PID=$! +for _ in $(seq 1 30); do + curl -sf "http://127.0.0.1:$API_PORT/health" >/dev/null 2>&1 && break + sleep 0.1 +done + +$DIG example.com A +short > /dev/null 2>&1 || true +sleep 1 + +STATS=$(curl -sf http://127.0.0.1:$API_PORT/stats 2>/dev/null) +# upstream_transport.odoh lives inside the upstream_transport object. +ODOH_COUNT=$(echo "$STATS" | grep -o '"upstream_transport":{[^}]*}' \ + | grep -o '"odoh":[0-9]*' | cut -d: -f2) +check "upstream_transport.odoh > 0 after a query" "[1-9]" "${ODOH_COUNT:-0}" + +check "Upstream label advertises odoh://" \ + "odoh://" \ + "$(echo "$STATS" | grep -o '"upstream":"[^"]*"')" + +check "Stats mode field is 'odoh'" \ + '"mode":"odoh"' \ + "$(echo "$STATS" | grep -o '"mode":"odoh"')" + +# Strict-mode failure path: a clearly-unreachable relay must produce +# SERVFAIL without silent downgrade. We hijack the config to point at +# an .invalid host so we don't rely on external uptime. +kill "$NUMA_PID" 2>/dev/null || true +wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + +cat > "$CONFIG" << 'CONF' +[server] +bind_addr = "127.0.0.1:5354" +api_port = 5381 + +[upstream] +mode = "odoh" +relay = "https://relay.invalid/proxy" +target = "https://odoh.cloudflare-dns.com/dns-query" +strict = true + +[cache] +max_entries = 10000 + +[blocking] +enabled = false + +[proxy] +enabled = false +CONF + +RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & +NUMA_PID=$! +for _ in $(seq 1 30); do + curl -sf "http://127.0.0.1:$API_PORT/health" >/dev/null 2>&1 && break + sleep 0.1 +done + +check "Strict-mode relay outage returns SERVFAIL" \ + "SERVFAIL" \ + "$($DIG example.com A 2>&1 | grep 'status:')" + +kill "$NUMA_PID" 2>/dev/null || true +wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + +# Negative: relay and target on the same host must be rejected at startup. +cat > "$CONFIG" << 'CONF' +[server] +bind_addr = "127.0.0.1:5354" +api_port = 5381 + +[upstream] +mode = "odoh" +relay = "https://odoh.cloudflare-dns.com/proxy" +target = "https://odoh.cloudflare-dns.com/dns-query" +CONF + +STARTUP_OUT=$("$BINARY" "$CONFIG" 2>&1 || true) +check "Same-host relay+target rejected at startup" \ + "same host" \ + "$STARTUP_OUT" + +fi # end Suite 8 + +# ---- Suite 9: Numa's own ODoH relay (--relay-mode) ---- +# Exercises `numa relay PORT` as a forwarding proxy to a real ODoH target. +# Validates the RFC 9230 §5 relay behaviour: URL-query routing, content-type +# gating, body-size cap, and /health observability. +if should_run_suite 9; then +echo "" +echo "╔══════════════════════════════════════════╗" +echo "║ Suite 9: Numa ODoH Relay (own) ║" +echo "╚══════════════════════════════════════════╝" + +RELAY_PORT=18443 +"$BINARY" relay $RELAY_PORT > "$LOG" 2>&1 & +NUMA_PID=$! +for _ in $(seq 1 30); do + curl -sf "http://127.0.0.1:$RELAY_PORT/health" >/dev/null 2>&1 && break + sleep 0.1 +done + +echo "" +echo "=== Relay Endpoints ===" + +check "Health endpoint returns ok" \ + "ok" \ + "$(curl -sf http://127.0.0.1:$RELAY_PORT/health | head -1)" + +# Happy path: forwards arbitrary body to Cloudflare's ODoH target. The +# target will reject the garbage envelope with HTTP 400 — which is exactly +# what proves our relay faithfully forwarded (otherwise we'd see our own +# 4xx from the relay itself). +HAPPY_STATUS=$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Content-Type: application/oblivious-dns-message" \ + --data-binary "garbage-forwarded-end-to-end" \ + "http://127.0.0.1:$RELAY_PORT/relay?targethost=odoh.cloudflare-dns.com&targetpath=/dns-query") +check "Relay forwards to target (target rejects garbage → 400)" \ + "400" \ + "$HAPPY_STATUS" + +echo "" +echo "=== Guards ===" + +check "Missing content-type → 415" \ + "415" \ + "$(curl -sS -o /dev/null -w '%{http_code}' -X POST --data-binary 'x' \ + 'http://127.0.0.1:'$RELAY_PORT'/relay?targethost=odoh.cloudflare-dns.com&targetpath=/dns-query')" + +check "Oversized body (>4 KiB) → 413" \ + "413" \ + "$(head -c 5000 /dev/urandom | curl -sS -o /dev/null -w '%{http_code}' -X POST \ + -H 'Content-Type: application/oblivious-dns-message' --data-binary @- \ + 'http://127.0.0.1:'$RELAY_PORT'/relay?targethost=odoh.cloudflare-dns.com&targetpath=/dns-query')" + +check "Invalid targethost (no dot) → 400" \ + "400" \ + "$(curl -sS -o /dev/null -w '%{http_code}' -X POST \ + -H 'Content-Type: application/oblivious-dns-message' --data-binary 'x' \ + 'http://127.0.0.1:'$RELAY_PORT'/relay?targethost=invalid&targetpath=/dns-query')" + +echo "" +echo "=== Counters ===" + +HEALTH=$(curl -sf "http://127.0.0.1:$RELAY_PORT/health") +check "Relay counted at least one forwarded_ok" \ + "[1-9]" \ + "$(echo "$HEALTH" | grep 'forwarded_ok' | awk '{print $2}')" +check "Relay counted at least one rejected_bad_request" \ + "[1-9]" \ + "$(echo "$HEALTH" | grep 'rejected_bad_request' | awk '{print $2}')" + +kill "$NUMA_PID" 2>/dev/null || true +wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + +fi # end Suite 9 + # Summary echo "" TOTAL=$((PASSED + FAILED)) diff --git a/tests/probe-odoh-ecosystem.sh b/tests/probe-odoh-ecosystem.sh new file mode 100755 index 0000000..b2ff311 --- /dev/null +++ b/tests/probe-odoh-ecosystem.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Probe the public ODoH ecosystem. +# +# Source of truth: DNSCrypt's curated list at +# https://github.com/DNSCrypt/dnscrypt-resolvers/tree/master/v3 +# - v3/odoh-servers.md (ODoH targets) +# - v3/odoh-relays.md (ODoH relays) +# +# As of commit 2025-09-16 ("odohrelay-crypto-sx seems to be the only ODoH +# relay left"), the full public ecosystem is 4 targets + 1 relay. Re-run this +# script against the upstream list before making any "only N public relays" +# claim publicly. +# +# Usage: ./tests/probe-odoh-ecosystem.sh + +set -uo pipefail + +GREEN="\033[32m" +RED="\033[31m" +YELLOW="\033[33m" +DIM="\033[90m" +RESET="\033[0m" + +UP=0 +DOWN=0 + +probe_target() { + local name="$1" + local host="$2" + local url="https://${host}/.well-known/odohconfigs" + local start=$(date +%s%N) + local headers + headers=$(curl -sS -o /tmp/odoh-probe-body -D - --max-time 5 -A "numa-odoh-probe/0.1" "$url" 2>&1) || { + DOWN=$((DOWN + 1)) + printf " ${RED}✗${RESET} %-25s ${DIM}unreachable${RESET}\n" "$name" + return + } + local elapsed_ms=$((($(date +%s%N) - start) / 1000000)) + local status + status=$(echo "$headers" | head -1 | awk '{print $2}') + local ctype + ctype=$(echo "$headers" | grep -i '^content-type:' | head -1 | tr -d '\r') + local size + size=$(stat -f%z /tmp/odoh-probe-body 2>/dev/null || stat -c%s /tmp/odoh-probe-body 2>/dev/null || echo 0) + + if [[ "$status" == "200" ]] && [[ "$size" -gt 0 ]]; then + UP=$((UP + 1)) + printf " ${GREEN}✓${RESET} %-25s ${DIM}%4dms %s bytes %s${RESET}\n" "$name" "$elapsed_ms" "$size" "$ctype" + else + DOWN=$((DOWN + 1)) + printf " ${RED}✗${RESET} %-25s ${DIM}status=%s size=%s${RESET}\n" "$name" "$status" "$size" + fi + rm -f /tmp/odoh-probe-body +} + +probe_relay() { + # Relays don't expose /.well-known/odohconfigs — we just verify TLS reachability + # and that the endpoint responds to a malformed POST with an HTTP error + # (indicating the relay path exists). A real ODoH validation requires HPKE. + local name="$1" + local url="$2" + local start=$(date +%s%N) + local status + status=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 5 -A "numa-odoh-probe/0.1" \ + -X POST -H "Content-Type: application/oblivious-dns-message" \ + --data-binary "" "$url" 2>&1) || { + DOWN=$((DOWN + 1)) + printf " ${RED}✗${RESET} %-25s ${DIM}unreachable${RESET}\n" "$name" + return + } + local elapsed_ms=$((($(date +%s%N) - start) / 1000000)) + # Any 2xx or 4xx means the endpoint is live (TLS works, HTTP responded). + # 5xx or 000 (curl failure) means broken. + if [[ "$status" =~ ^[24] ]]; then + UP=$((UP + 1)) + printf " ${GREEN}✓${RESET} %-25s ${DIM}%4dms status=%s (endpoint live)${RESET}\n" "$name" "$elapsed_ms" "$status" + else + DOWN=$((DOWN + 1)) + printf " ${RED}✗${RESET} %-25s ${DIM}status=%s${RESET}\n" "$name" "$status" + fi +} + +echo "ODoH targets:" +probe_target "Cloudflare" "odoh.cloudflare-dns.com" +probe_target "crypto.sx" "odoh.crypto.sx" +probe_target "Snowstorm" "dope.snowstorm.love" +probe_target "Tiarap" "doh.tiarap.org" + +echo +echo "ODoH relays:" +probe_relay "Frank Denis (Fastly)" "https://odoh-relay.edgecompute.app/proxy" + +echo +TOTAL=$((UP + DOWN)) +if [[ "$DOWN" -eq 0 ]]; then + printf "${GREEN}All %d endpoints up${RESET}\n" "$TOTAL" + exit 0 +else + printf "${YELLOW}%d/%d up, %d down${RESET}\n" "$UP" "$TOTAL" "$DOWN" + exit 1 +fi -- 2.34.1 From cf128c19af0cc2b747398ae4fd853e7150078edb Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 15:44:09 +0300 Subject: [PATCH 106/139] feat(odoh): bootstrap-IP overrides + zero hedge for ODoH (post-deploy fixes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues surfaced from running mode = "odoh" against the live Hetzner relay as system DNS: 1. **Bootstrap deadlock.** The reqwest HTTPS client resolves the relay and target hostnames via system DNS. When numa is itself the system resolver, the ODoH client loops trying to resolve through itself. Adds optional `relay_ip` and `target_ip` to `[upstream]`, plumbed into reqwest's `resolve()` so the HTTPS client bypasses system DNS for those two hostnames. TLS still validates against the URL hostname, so a stale IP fails loudly rather than silently MITM'ing. 2. **2x relay load.** Default `hedge_ms = 10` triggers a duplicate in-flight query for every request. Useful for UDP/DoH/DoT (rescues tail latency cheaply); wasteful for ODoH (doubles HPKE seal/unseal, doubles sealed-byte footprint a passive observer can correlate, no latency win — relay hop dominates either way). Force-zero in oblivious mode regardless of configured hedge_ms. Validated end-to-end against odoh-relay.numa.rs → Cloudflare: 3 digs produced 3 forwarded_ok on the relay (was 6 before the hedge fix), upstream_transport.odoh ticks correctly. --- src/config.rs | 123 ++++++++++++++++++++++++++++++++++++++++++++++++- src/forward.rs | 21 ++++++++- src/serve.rs | 13 ++++-- 3 files changed, 149 insertions(+), 8 deletions(-) diff --git a/src/config.rs b/src/config.rs index 2d2f1ba..1205e37 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; -use std::net::Ipv4Addr; -use std::net::Ipv6Addr; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; +use std::time::Duration; use serde::Deserialize; @@ -146,6 +146,19 @@ impl UpstreamMode { UpstreamMode::Odoh => "odoh", } } + + /// Hedging duplicates the in-flight query against the same upstream to + /// rescue tail latency. Beneficial for UDP/DoH/DoT (cheap retransmit / + /// h2 stream multiplexing). For ODoH it doubles the relay's HPKE + /// seal/unseal load and the sealed-byte footprint a passive observer + /// can correlate, with no latency win — the relay hop dominates either + /// way. Force-zero in oblivious mode regardless of `hedge_ms`. + pub fn hedge_delay(self, hedge_ms: u64) -> Duration { + match self { + UpstreamMode::Odoh => Duration::ZERO, + _ => Duration::from_millis(hedge_ms), + } + } } #[derive(Deserialize)] @@ -182,6 +195,16 @@ pub struct UpstreamConfig { /// a user who configured ODoH rarely wants a silent non-oblivious path. #[serde(default)] pub strict: Option, + + /// Bootstrap IP for the relay host, used when numa is its own system + /// resolver (otherwise the ODoH HTTPS client loops resolving through + /// itself). TLS still validates the cert against `relay`'s hostname. + #[serde(default)] + pub relay_ip: Option, + + /// Same as `relay_ip` but for the target host. + #[serde(default)] + pub target_ip: Option, } impl Default for UpstreamConfig { @@ -199,6 +222,8 @@ impl Default for UpstreamConfig { relay: None, target: None, strict: None, + relay_ip: None, + target_ip: None, } } } @@ -208,9 +233,12 @@ impl Default for UpstreamConfig { #[derive(Debug)] pub struct OdohUpstream { pub relay_url: String, + pub relay_host: String, pub target_host: String, pub target_path: String, pub strict: bool, + pub relay_bootstrap: Option, + pub target_bootstrap: Option, } impl UpstreamConfig { @@ -246,6 +274,10 @@ impl UpstreamConfig { .into()); } + let relay_host = relay_url + .host_str() + .ok_or("upstream.relay has no host")? + .to_string(); let target_host = target_url .host_str() .ok_or("upstream.target has no host")? @@ -256,11 +288,17 @@ impl UpstreamConfig { target_url.path().to_string() }; + let relay_port = relay_url.port_or_known_default().unwrap_or(443); + let target_port = target_url.port_or_known_default().unwrap_or(443); + Ok(OdohUpstream { relay_url: relay.to_string(), + relay_host, target_host, target_path, strict: self.strict.unwrap_or(true), + relay_bootstrap: self.relay_ip.map(|ip| SocketAddr::new(ip, relay_port)), + target_bootstrap: self.target_ip.map(|ip| SocketAddr::new(ip, target_port)), }) } } @@ -817,6 +855,87 @@ target = "https://odoh.cloudflare-dns.com/dns-query" assert!(err.contains("upstream.relay"), "got: {err}"); } + #[test] + fn odoh_bootstrap_ips_parse_into_socket_addrs() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "178.104.229.30" +target_ip = "104.16.249.249" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert_eq!(odoh.relay_host, "odoh-relay.numa.rs"); + assert_eq!( + odoh.relay_bootstrap.unwrap().to_string(), + "178.104.229.30:443" + ); + assert_eq!( + odoh.target_bootstrap.unwrap().to_string(), + "104.16.249.249:443" + ); + } + + #[test] + fn odoh_bootstrap_ips_optional() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert!(odoh.relay_bootstrap.is_none()); + assert!(odoh.target_bootstrap.is_none()); + } + + #[test] + fn odoh_bootstrap_ip_rejects_garbage() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "not-an-ip" +"#; + let err = toml::from_str::(toml).err().unwrap().to_string(); + assert!(err.contains("relay_ip"), "got: {err}"); + } + + #[test] + fn odoh_bootstrap_uses_url_port_when_non_default() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs:8443/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "178.104.229.30" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let odoh = config.upstream.odoh_upstream().unwrap(); + assert_eq!( + odoh.relay_bootstrap.unwrap().to_string(), + "178.104.229.30:8443" + ); + } + + #[test] + fn hedge_delay_zeroed_for_odoh_mode() { + assert_eq!( + UpstreamMode::Odoh.hedge_delay(50), + Duration::ZERO, + "ODoH mode must zero hedge regardless of configured hedge_ms" + ); + assert_eq!( + UpstreamMode::Forward.hedge_delay(50), + Duration::from_millis(50), + "non-ODoH modes honour configured hedge_ms" + ); + } + #[test] fn odoh_missing_target_rejected() { let toml = r#" diff --git a/src/forward.rs b/src/forward.rs index bb91fcf..530f1ed 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -176,6 +176,25 @@ pub fn build_https_client() -> reqwest::Client { /// and benefit from a larger pool so warm connections survive concurrent /// fan-out. pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client { + https_client_builder(pool_max_idle_per_host) + .build() + .unwrap_or_default() +} + +/// HTTPS client for the ODoH upstream, with bootstrap-IP overrides applied +/// so relay/target hostname resolution can bypass system DNS. +pub fn build_odoh_client(odoh: &crate::config::OdohUpstream) -> reqwest::Client { + let mut builder = https_client_builder(1); + if let Some(addr) = odoh.relay_bootstrap { + builder = builder.resolve(&odoh.relay_host, addr); + } + if let Some(addr) = odoh.target_bootstrap { + builder = builder.resolve(&odoh.target_host, addr); + } + builder.build().unwrap_or_default() +} + +fn https_client_builder(pool_max_idle_per_host: usize) -> reqwest::ClientBuilder { reqwest::Client::builder() .use_rustls_tls() .http2_initial_stream_window_size(65_535) @@ -185,8 +204,6 @@ pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::C .http2_keep_alive_timeout(Duration::from_secs(10)) .pool_idle_timeout(Duration::from_secs(300)) .pool_max_idle_per_host(pool_max_idle_per_host) - .build() - .unwrap_or_default() } fn build_dot_connector() -> Result { diff --git a/src/serve.rs b/src/serve.rs index 2037857..9b4b587 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -17,7 +17,9 @@ use crate::buffer::BytePacketBuffer; use crate::cache::DnsCache; use crate::config::{build_zone_map, load_config, ConfigLoad}; use crate::ctx::{handle_query, ServerCtx}; -use crate::forward::{build_https_client, parse_upstream_list, Upstream, UpstreamPool}; +use crate::forward::{ + build_https_client, build_odoh_client, parse_upstream_list, Upstream, UpstreamPool, +}; use crate::odoh::OdohConfigCache; use crate::override_store::OverrideStore; use crate::query_log::QueryLog; @@ -94,8 +96,11 @@ pub async fn run(config_path: String) -> crate::Result<()> { } crate::config::UpstreamMode::Odoh => { let odoh = config.upstream.odoh_upstream()?; - let client = build_https_client(); - let target_config = Arc::new(OdohConfigCache::new(odoh.target_host, client.clone())); + let client = build_odoh_client(&odoh); + let target_config = Arc::new(OdohConfigCache::new( + odoh.target_host.clone(), + client.clone(), + )); let primary = vec![Upstream::Odoh { relay_url: odoh.relay_url, target_path: odoh.target_path, @@ -222,7 +227,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { upstream_port: config.upstream.port, lan_ip: Mutex::new(crate::lan::detect_lan_ip().unwrap_or(std::net::Ipv4Addr::LOCALHOST)), timeout: Duration::from_millis(config.upstream.timeout_ms), - hedge_delay: Duration::from_millis(config.upstream.hedge_ms), + hedge_delay: resolved_mode.hedge_delay(config.upstream.hedge_ms), proxy_tld_suffix: if config.proxy.tld.is_empty() { String::new() } else { -- 2.34.1 From a3cc64c94f6d7e53705455e4e384ac5b811eaa1e Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 15:44:20 +0300 Subject: [PATCH 107/139] feat(odoh): relay bind-address CLI arg + dashboard Outbound Wire panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `numa relay [PORT] [BIND]` accepts an optional bind address (defaults to 127.0.0.1, matching the Caddy reverse-proxy deployment shape). Required for Docker, where the relay needs 0.0.0.0 inside the container so Caddy can reach it across the bridge network. - Dashboard now surfaces the upstream_transport dimension as an "Outbound Wire" panel alongside the existing "Inbound Wire" (renamed from "Transport" for directional clarity). Sub-headers — "apps → numa" / "numa → internet" — make the threat-model split obvious without jargon. Bars: UDP/DoH/DoT/ODoH, headline "X% encrypted outbound". The PR description's promise that "the dashboard answers how much of my DNS traffic left in cleartext honestly" is now true. --- site/dashboard.html | 35 ++++++++++++++++++++++++++++++++--- src/main.rs | 15 +++++++++++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index fa2d965..710692b 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -228,6 +228,7 @@ body { .path-bar-fill.tcp { background: var(--violet); } .path-bar-fill.dot { background: var(--emerald); } .path-bar-fill.doh { background: var(--teal); } +.path-bar-fill.odoh { background: var(--violet-dim); } .path-pct { font-family: var(--font-mono); font-size: 0.75rem; @@ -637,16 +638,26 @@ body {
      - +
      - Transport + Inbound Wire apps → numa
      + +
      +
      + Outbound Wire numa → internet + +
      +
      +
      +
      +
      @@ -992,7 +1003,24 @@ function renderTransport(transport) { renderBarChart('transportBars', TRANSPORT_DEFS, transport, total); const encPct = encryptionPct(transport); const el = document.getElementById('transportEncrypted'); - el.textContent = `${encPct}% encrypted`; + el.textContent = `${encPct}% encrypted inbound`; + el.style.color = encPct >= 80 ? 'var(--emerald)' : encPct >= 50 ? 'var(--amber)' : 'var(--rose)'; +} + +const UPSTREAM_WIRE_DEFS = [ + { key: 'udp', label: 'UDP', cls: 'udp' }, + { key: 'doh', label: 'DoH', cls: 'doh' }, + { key: 'dot', label: 'DoT', cls: 'dot' }, + { key: 'odoh', label: 'ODoH', cls: 'odoh' }, +]; + +function renderUpstreamWire(ut) { + const total = (ut.udp + ut.doh + ut.dot + ut.odoh) || 0; + renderBarChart('upstreamWireBars', UPSTREAM_WIRE_DEFS, ut, total || 1); + const encrypted = ut.doh + ut.dot + ut.odoh; + const encPct = total > 0 ? Math.round((encrypted / total) * 100) : 0; + const el = document.getElementById('upstreamWireEncrypted'); + el.textContent = total > 0 ? `${encPct}% encrypted outbound` : ''; el.style.color = encPct >= 80 ? 'var(--emerald)' : encPct >= 50 ? 'var(--amber)' : 'var(--rose)'; } @@ -1234,6 +1262,7 @@ async function refresh() { // Panels renderPaths(q); renderTransport(stats.transport); + renderUpstreamWire(stats.upstream_transport || { udp: 0, doh: 0, dot: 0, odoh: 0 }); renderQueryLog(logs); renderOverrides(overrides); renderCache(cache); diff --git a/src/main.rs b/src/main.rs index e077a2f..8f9fecf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,7 +66,17 @@ fn main() -> numa::Result<()> { .as_deref() .and_then(|s| s.parse().ok()) .unwrap_or(8443); - let addr: std::net::SocketAddr = ([127, 0, 0, 1], port).into(); + let bind: std::net::IpAddr = std::env::args() + .nth(3) + .as_deref() + .map(|s| { + s.parse().unwrap_or_else(|e| { + eprintln!("invalid bind address '{}': {}", s, e); + std::process::exit(1); + }) + }) + .unwrap_or(std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST)); + let addr = std::net::SocketAddr::new(bind, port); eprintln!( "\x1b[1;38;2;192;98;58mNuma\x1b[0m — ODoH relay on {}\n", addr @@ -107,7 +117,8 @@ fn main() -> numa::Result<()> { eprintln!(" service status Check if the service is running"); eprintln!(" lan on Enable LAN service discovery (mDNS)"); eprintln!(" lan off Disable LAN service discovery"); - eprintln!(" relay [PORT] Run as an ODoH relay (RFC 9230, default port 8443)"); + eprintln!(" relay [PORT] [BIND]"); + eprintln!(" Run as an ODoH relay (RFC 9230, default 127.0.0.1:8443)"); eprintln!(" setup-phone Generate a QR code to install Numa DoT on a phone"); eprintln!(" help Show this help"); eprintln!(); -- 2.34.1 From be60f6ccbc33189d34bd5e02d894aec69ba6fe8c Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 15:44:29 +0300 Subject: [PATCH 108/139] chore(packaging): docker-compose + Caddyfile for ODoH relay deploy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-container deploy: Caddy terminates TLS (auto-provisions Let's Encrypt via ACME) and reverse-proxies to a Numa relay on an internal Docker network. The relay never reads sealed payloads; Caddy's access log is discarded so per-request observability doesn't defeat the oblivious property. Validated against Hetzner CX22 + DNS at odoh-relay.numa.rs: - TLS-ALPN-01 challenge succeeded on first attempt - /health returned the relay's counter block - End-to-end ODoH client → relay → Cloudflare works Operators only need to: set a DNS A record, edit Caddyfile's hostname, docker compose up -d. README walks through the steps and the DNSCrypt v3/odoh-relays.md submission to claim a public listing. --- packaging/relay/Caddyfile | 15 ++++++++++ packaging/relay/README.md | 48 ++++++++++++++++++++++++++++++ packaging/relay/docker-compose.yml | 26 ++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 packaging/relay/Caddyfile create mode 100644 packaging/relay/README.md create mode 100644 packaging/relay/docker-compose.yml diff --git a/packaging/relay/Caddyfile b/packaging/relay/Caddyfile new file mode 100644 index 0000000..ea368c8 --- /dev/null +++ b/packaging/relay/Caddyfile @@ -0,0 +1,15 @@ +odoh-relay.example.com { + handle /relay { + reverse_proxy numa-relay:8443 + } + handle /health { + reverse_proxy numa-relay:8443 + } + respond 404 + + # Per-request access logs defeat the point of an oblivious relay. + # Aggregate counters are exposed at /health on the relay itself. + log { + output discard + } +} diff --git a/packaging/relay/README.md b/packaging/relay/README.md new file mode 100644 index 0000000..373b263 --- /dev/null +++ b/packaging/relay/README.md @@ -0,0 +1,48 @@ +# Numa ODoH Relay — Docker deploy + +Two-container deploy: Caddy terminates TLS (auto-provisioning a Let's Encrypt +cert via ACME) and reverse-proxies to a Numa relay running on an internal +Docker network. The relay never reads sealed payloads; Caddy never logs them. + +## Prerequisites + +- A host with public 80/443 reachable from the internet. +- A DNS record (`A` or `AAAA`) pointing your chosen hostname at the host. +- Docker + Docker Compose v2. + +## Configure + +Edit `Caddyfile` and replace `odoh-relay.example.com` with your hostname. +That hostname is what ACME validates against and what ODoH clients will +configure as their relay URL: `https:///relay`. + +## Deploy + +```sh +docker compose up -d +docker compose logs -f caddy # watch ACME provisioning +``` + +First boot takes a few seconds while Caddy obtains the cert. Subsequent +restarts reuse the cached cert from the `caddy_data` volume. + +## Verify + +```sh +curl https:///health +# ok +# total 0 +# forwarded_ok 0 +# forwarded_err 0 +# rejected_bad_request 0 +``` + +Then point any ODoH client at `https:///relay` and watch the +counters tick. + +## Listing on the public ecosystem + +DNSCrypt's [v3/odoh-relays.md](https://github.com/DNSCrypt/dnscrypt-resolvers/blob/master/v3/odoh-relays.md) +is the canonical list. The pruned 2025-09-16 commit shows one public ODoH +relay survived the cull — running this compose file doubles global supply. +Open a PR there once your relay has been up for ~24 hours. diff --git a/packaging/relay/docker-compose.yml b/packaging/relay/docker-compose.yml new file mode 100644 index 0000000..9561535 --- /dev/null +++ b/packaging/relay/docker-compose.yml @@ -0,0 +1,26 @@ +services: + numa-relay: + image: ghcr.io/razvandimescu/numa:latest + command: ["relay", "8443", "0.0.0.0"] + restart: unless-stopped + networks: [internal] + + caddy: + image: caddy:2 + ports: + - "80:80" + - "443:443" + volumes: + - ./Caddyfile:/etc/caddy/Caddyfile:ro + - caddy_data:/data + - caddy_config:/config + restart: unless-stopped + depends_on: [numa-relay] + networks: [internal] + +networks: + internal: + +volumes: + caddy_data: + caddy_config: -- 2.34.1 From eb5ea3b645f0e64806fc5975fba3ea2c76e651a8 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 16:03:34 +0300 Subject: [PATCH 109/139] refactor(odoh): deduplicate post-audit findings - Hoist ODOH_CONTENT_TYPE to a single pub(crate) constant in odoh.rs; relay.rs imports it instead of declaring its own. - Generalize dashboard encryptionPct(data, encryptedKeys, allKeys) so both Inbound Wire and Outbound Wire panels share the same math instead of drifting independently. - Extract RelayState::new() and build_app() helpers in relay.rs so the test spawn_relay() and production run() wire the same router + body-limit layer. Prevents future middleware from landing in one path but not the other. All 344 lib tests pass; no behavior change. --- site/dashboard.html | 13 ++++++------ src/odoh.rs | 2 +- src/relay.rs | 49 ++++++++++++++++++++------------------------- 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/site/dashboard.html b/site/dashboard.html index 710692b..7b20e17 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -971,9 +971,11 @@ function renderBarChart(containerId, defs, data, total) { }).join(''); } -function encryptionPct(transport) { - const total = (transport.udp + transport.tcp + transport.dot + transport.doh) || 1; - return (((transport.dot + transport.doh) / total) * 100).toFixed(0); +function encryptionPct(data, encryptedKeys, allKeys) { + const total = allKeys.reduce((s, k) => s + (data[k] || 0), 0); + if (total === 0) return 0; + const encrypted = encryptedKeys.reduce((s, k) => s + (data[k] || 0), 0); + return Math.round((encrypted / total) * 100); } const PATH_DEFS = [ @@ -1001,7 +1003,7 @@ const TRANSPORT_DEFS = [ function renderTransport(transport) { const total = (transport.udp + transport.tcp + transport.dot + transport.doh) || 1; renderBarChart('transportBars', TRANSPORT_DEFS, transport, total); - const encPct = encryptionPct(transport); + const encPct = encryptionPct(transport, ['dot', 'doh'], ['udp', 'tcp', 'dot', 'doh']); const el = document.getElementById('transportEncrypted'); el.textContent = `${encPct}% encrypted inbound`; el.style.color = encPct >= 80 ? 'var(--emerald)' : encPct >= 50 ? 'var(--amber)' : 'var(--rose)'; @@ -1017,8 +1019,7 @@ const UPSTREAM_WIRE_DEFS = [ function renderUpstreamWire(ut) { const total = (ut.udp + ut.doh + ut.dot + ut.odoh) || 0; renderBarChart('upstreamWireBars', UPSTREAM_WIRE_DEFS, ut, total || 1); - const encrypted = ut.doh + ut.dot + ut.odoh; - const encPct = total > 0 ? Math.round((encrypted / total) * 100) : 0; + const encPct = encryptionPct(ut, ['doh', 'dot', 'odoh'], ['udp', 'doh', 'dot', 'odoh']); const el = document.getElementById('upstreamWireEncrypted'); el.textContent = total > 0 ? `${encPct}% encrypted outbound` : ''; el.style.color = encPct >= 80 ? 'var(--emerald)' : encPct >= 50 ? 'var(--amber)' : 'var(--rose)'; diff --git a/src/odoh.rs b/src/odoh.rs index 2cfa9c5..0901c94 100644 --- a/src/odoh.rs +++ b/src/odoh.rs @@ -25,7 +25,7 @@ use tokio::time::timeout; use crate::Result; /// MIME type used for both directions of the ODoH exchange (RFC 9230 §4). -const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; +pub(crate) const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; /// Cap on the response body we read into memory when the relay returns /// non-success. Protects against a hostile relay streaming a huge body on diff --git a/src/relay.rs b/src/relay.rs index 8d6ab40..122796e 100644 --- a/src/relay.rs +++ b/src/relay.rs @@ -20,10 +20,9 @@ use serde::Deserialize; use tokio::net::TcpListener; use crate::forward::build_https_client_with_pool; +use crate::odoh::ODOH_CONTENT_TYPE; use crate::Result; -const ODOH_CONTENT_TYPE: &str = "application/oblivious-dns-message"; - /// Cap on the opaque body we accept from a client. ODoH envelopes are /// ~100–300 bytes in practice; anything larger is malformed or hostile. const MAX_BODY_BYTES: usize = 4 * 1024; @@ -55,23 +54,30 @@ struct RelayState { rejected_bad_request: AtomicU64, } -pub async fn run(addr: SocketAddr) -> Result<()> { - let state = Arc::new(RelayState { - client: build_https_client_with_pool(RELAY_POOL_PER_HOST), - total_requests: AtomicU64::new(0), - forwarded_ok: AtomicU64::new(0), - forwarded_err: AtomicU64::new(0), - rejected_bad_request: AtomicU64::new(0), - }); +impl RelayState { + fn new() -> Arc { + Arc::new(RelayState { + client: build_https_client_with_pool(RELAY_POOL_PER_HOST), + total_requests: AtomicU64::new(0), + forwarded_ok: AtomicU64::new(0), + forwarded_err: AtomicU64::new(0), + rejected_bad_request: AtomicU64::new(0), + }) + } +} - let app = Router::new() +/// `DefaultBodyLimit` overrides axum's 2 MiB default so hostile clients +/// can't force the relay to buffer multi-MB bodies before our own cap. +fn build_app(state: Arc) -> Router { + Router::new() .route("/relay", post(handle_relay)) - // Overrides axum's default (2 MiB) so hostile clients can't force - // the relay to buffer multi-MB bodies before our own cap check. .layer(DefaultBodyLimit::max(MAX_BODY_BYTES)) .route("/health", get(handle_health)) - .with_state(state); + .with_state(state) +} +pub async fn run(addr: SocketAddr) -> Result<()> { + let app = build_app(RelayState::new()); let listener = TcpListener::bind(addr).await?; info!("ODoH relay listening on {}", addr); axum::serve(listener, app).await?; @@ -199,19 +205,8 @@ mod tests { let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); - let state = Arc::new(RelayState { - client: build_https_client_with_pool(RELAY_POOL_PER_HOST), - total_requests: AtomicU64::new(0), - forwarded_ok: AtomicU64::new(0), - forwarded_err: AtomicU64::new(0), - rejected_bad_request: AtomicU64::new(0), - }); - - let app = Router::new() - .route("/relay", post(handle_relay)) - .layer(DefaultBodyLimit::max(MAX_BODY_BYTES)) - .route("/health", get(handle_health)) - .with_state(state.clone()); + let state = RelayState::new(); + let app = build_app(state.clone()); tokio::spawn(async move { let _ = axum::serve(listener, app).await; -- 2.34.1 From 07c321f7492209272792d487fa0882536c6c9b1d Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 17:07:31 +0300 Subject: [PATCH 110/139] chore(release): bump to v0.14.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Headline: ODoH (RFC 9230) — client + self-hosted relay. Set mode = "odoh" in [upstream] to seal queries before they leave the machine; run `numa relay` to add to the public ODoH ecosystem. --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2bfeaa6..b630e73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1547,7 +1547,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.13.1" +version = "0.14.0" dependencies = [ "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index 15601c7..c22352b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.13.1" +version = "0.14.0" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" -- 2.34.1 From cd6e686a1a8ae3ae2f621e62ca442dec08261a70 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 17:14:21 +0300 Subject: [PATCH 111/139] docs(readme): surface ODoH in the intro paragraph Adds the v0.14.0 capability where it's most differentiating: the first paragraph (sealed-query framing alongside the existing ad-blocking and .numa-domain pitches) and the second paragraph (numa relay as a public ODoH endpoint, with the DNSCrypt-list supply-doubling angle as fact). No reposition: tagline and structure unchanged. ODoH joins the existing capability set rather than displacing it. Hero GIF stays; will be re-recorded once the dashboard's Outbound Wire panel is worth showing in motion. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1728461..e5310de 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ **DNS you own. Everywhere you go.** — [numa.rs](https://numa.rs) -A portable DNS resolver in a single binary. Block ads on any network, name your local services (`frontend.numa`), and override any hostname with auto-revert — all from your laptop, no cloud account or Raspberry Pi required. +A portable DNS resolver in a single binary. Block ads on any network, name your local services (`frontend.numa`), override any hostname with auto-revert, and seal every outbound query with **ODoH (RFC 9230)** so no single party sees both who you are and what you asked — all from your laptop, no cloud account or Raspberry Pi required. -Built from scratch in Rust. Zero DNS libraries. RFC 1035 wire protocol parsed by hand. Caching, ad blocking, and local service domains out of the box. Optional recursive resolution from root nameservers with full DNSSEC chain-of-trust validation, plus a DNS-over-TLS listener for encrypted client connections (iOS Private DNS, systemd-resolved, etc.). One ~8MB binary, everything embedded. +Built from scratch in Rust. Zero DNS libraries. RFC 1035 wire protocol parsed by hand. Caching, ad blocking, and local service domains out of the box. Optional recursive resolution from root nameservers with full DNSSEC chain-of-trust validation, plus a DNS-over-TLS listener for encrypted client connections (iOS Private DNS, systemd-resolved, etc.). Run `numa relay` and the same binary becomes a public ODoH endpoint too — the curated DNSCrypt list currently has one surviving relay, so every Numa deploy materially expands the ecosystem. One ~8MB binary, everything embedded. ![Numa dashboard](assets/hero-demo.gif) -- 2.34.1 From 4c685d1602db13ac82503abdc3930a487078b026 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 17:19:16 +0300 Subject: [PATCH 112/139] docs(readme): pamper readme still --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e5310de..905cd02 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A portable DNS resolver in a single binary. Block ads on any network, name your local services (`frontend.numa`), override any hostname with auto-revert, and seal every outbound query with **ODoH (RFC 9230)** so no single party sees both who you are and what you asked — all from your laptop, no cloud account or Raspberry Pi required. -Built from scratch in Rust. Zero DNS libraries. RFC 1035 wire protocol parsed by hand. Caching, ad blocking, and local service domains out of the box. Optional recursive resolution from root nameservers with full DNSSEC chain-of-trust validation, plus a DNS-over-TLS listener for encrypted client connections (iOS Private DNS, systemd-resolved, etc.). Run `numa relay` and the same binary becomes a public ODoH endpoint too — the curated DNSCrypt list currently has one surviving relay, so every Numa deploy materially expands the ecosystem. One ~8MB binary, everything embedded. +Built from scratch in Rust. Zero DNS libraries. Caching, ad blocking, and local service domains out of the box. Optional recursive resolution from root nameservers with full DNSSEC chain-of-trust validation, plus a DNS-over-TLS listener for encrypted client connections (iOS Private DNS, systemd-resolved, etc.). Run `numa relay` and the same binary becomes a public ODoH endpoint too — the curated DNSCrypt list currently has one surviving relay, so every Numa deploy materially expands the ecosystem. One ~8MB binary, everything embedded. ![Numa dashboard](assets/hero-demo.gif) -- 2.34.1 From 193b38b85f2a842cbafa2ffccbc921e40319f9f1 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 18:46:54 +0300 Subject: [PATCH 113/139] feat(odoh): reject relay+target sharing an eTLD+1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plain host-string equality caught the copy-paste-same-URL footgun but let `r.cloudflare.com` + `odoh.cloudflare.com` through — two subdomains of the same operator collapse ODoH to ordinary DoH. Add a second layer: compare registrable domains via the PSL (`psl` crate) after the exact- host check. Fails open on IP literals and unparseable hosts; the exact- host check still runs in those cases. --- Cargo.lock | 16 +++++++++ Cargo.toml | 1 + src/config.rs | 97 ++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 101 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b630e73..dc95f58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1562,6 +1562,7 @@ dependencies = [ "hyper-util", "log", "odoh-rs", + "psl", "qrcode", "rand_core 0.9.5", "rcgen", @@ -1802,6 +1803,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psl" +version = "2.1.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76c0777260d32b76a8c3c197646707085d37e79d63b5872a29192c8d4f60f50b" +dependencies = [ + "psl-types", +] + +[[package]] +name = "psl-types" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" + [[package]] name = "qrcode" version = "0.14.1" diff --git a/Cargo.toml b/Cargo.toml index c22352b..ec3bb43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ tokio-rustls = "0.26" arc-swap = "1" ring = "0.17" odoh-rs = "1" +psl = "2" # rand_core 0.9 matches the version odoh-rs (via hpke 0.13) depends on, so we # share one RngCore trait and OsRng impl across the dep tree. rand_core = { version = "0.9", features = ["os_rng"] } diff --git a/src/config.rs b/src/config.rs index 1205e37..3a41d24 100644 --- a/src/config.rs +++ b/src/config.rs @@ -263,25 +263,29 @@ impl UpstreamConfig { if relay_url.scheme() != "https" || target_url.scheme() != "https" { return Err("upstream.relay and upstream.target must both use https://".into()); } - if relay_url.host_str().is_none() || target_url.host_str().is_none() { - return Err("upstream.relay and upstream.target must include a host".into()); - } - if relay_url.host_str() == target_url.host_str() { - return Err(format!( - "upstream.relay and upstream.target resolve to the same host ({}); the privacy property requires distinct operators", - relay_url.host_str().unwrap_or("?") - ) - .into()); - } - let relay_host = relay_url .host_str() - .ok_or("upstream.relay has no host")? + .ok_or("upstream.relay must include a host")? .to_string(); let target_host = target_url .host_str() - .ok_or("upstream.target has no host")? + .ok_or("upstream.target must include a host")? .to_string(); + + if relay_host == target_host { + return Err(format!( + "upstream.relay and upstream.target resolve to the same host ({}); the privacy property requires distinct operators", + relay_host + ) + .into()); + } + if let Some(shared) = shared_registrable_domain(&relay_host, &target_host) { + return Err(format!( + "upstream.relay ({}) and upstream.target ({}) share the registrable domain ({}); the privacy property requires distinct operators", + relay_host, target_host, shared + ) + .into()); + } let target_path = if target_url.path().is_empty() { "/".to_string() } else { @@ -303,6 +307,20 @@ impl UpstreamConfig { } } +/// Returns the registrable domain (eTLD+1) shared by both hosts, if any. +/// Fails open on hosts the PSL can't parse (IP literals, bare TLDs). +fn shared_registrable_domain(relay_host: &str, target_host: &str) -> Option { + let relay = psl::domain(relay_host.as_bytes())?; + let target = psl::domain(target_host.as_bytes())?; + if relay.as_bytes() == target.as_bytes() { + std::str::from_utf8(relay.as_bytes()) + .ok() + .map(str::to_owned) + } else { + None + } +} + fn string_or_vec<'de, D>(deserializer: D) -> std::result::Result, D::Error> where D: serde::Deserializer<'de>, @@ -830,6 +848,59 @@ target = "https://odoh.example.com/dns-query" assert!(err.contains("same host"), "got: {err}"); } + #[test] + fn odoh_rejects_shared_registrable_domain() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://r.cloudflare.com/relay" +target = "https://odoh.cloudflare.com/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("registrable domain"), "got: {err}"); + assert!(err.contains("cloudflare.com"), "got: {err}"); + } + + #[test] + fn odoh_rejects_shared_registrable_under_multi_label_suffix() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://a.foo.co.uk/relay" +target = "https://b.foo.co.uk/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + let err = config.upstream.odoh_upstream().unwrap_err().to_string(); + assert!(err.contains("foo.co.uk"), "got: {err}"); + } + + #[test] + fn odoh_accepts_distinct_registrable_under_multi_label_suffix() { + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://relay.foo.co.uk/relay" +target = "https://target.bar.co.uk/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + assert!(config.upstream.odoh_upstream().is_ok()); + } + + #[test] + fn odoh_accepts_distinct_private_psl_suffix_subdomains() { + // *.github.io is a public suffix, so foo.github.io and bar.github.io + // are independent registrable domains — accept. + let toml = r#" +[upstream] +mode = "odoh" +relay = "https://foo.github.io/relay" +target = "https://bar.github.io/dns-query" +"#; + let config: Config = toml::from_str(toml).unwrap(); + assert!(config.upstream.odoh_upstream().is_ok()); + } + #[test] fn odoh_rejects_non_https() { let toml = r#" -- 2.34.1 From 15978a78598805312a9c8f6ab9825ed17be4415a Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 19:04:15 +0300 Subject: [PATCH 114/139] fix(dashboard): pass missing args to encryptionPct in refresh() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit eb5ea3b generalised encryptionPct from (transport) to (data, encryptedKeys, allKeys) and updated renderTransport and renderUpstreamWire, but missed the call inside render() that computes the inline `~N/s · M% enc` QPS tag. With undefined allKeys, the first .reduce() threw TypeError and the render try/catch silently downgraded the whole dashboard to "disconnected" — every panel left empty even though /stats was returning real data. Fix the call site to match the other two (inbound-wire keys) and have the catch log to console so the next silent-failure regression shows up in DevTools within seconds instead of a source dive. --- site/dashboard.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/site/dashboard.html b/site/dashboard.html index 7b20e17..a0322a8 100644 --- a/site/dashboard.html +++ b/site/dashboard.html @@ -1244,7 +1244,7 @@ async function refresh() { // QPS calculation const now = Date.now(); - const encPct = encryptionPct(stats.transport); + const encPct = encryptionPct(stats.transport, ['dot', 'doh'], ['udp', 'tcp', 'dot', 'doh']); if (prevTotal !== null && prevTime !== null) { const dt = (now - prevTime) / 1000; const dq = q.total - prevTotal; @@ -1273,6 +1273,7 @@ async function refresh() { renderMemory(stats.memory, stats); } catch (err) { + console.error('[numa dashboard] render failed:', err); document.getElementById('statusDot').className = 'status-dot error'; document.getElementById('statusText').textContent = 'disconnected'; } -- 2.34.1 From 5b1642c6dc0143082948a60f77148a845f455d47 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 19:07:08 +0300 Subject: [PATCH 115/139] fix(blocklist): retry on transient download failures (#122) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On cold start, reqwest's getaddrinfo can race numa's own first-query cold-path latency — resolver timeout fires before numa warms its upstream DoH connection. Wrap each blocklist fetch in 3 retries with 2s/10s/30s backoff; by the second attempt, the upstream is warm and subsequent getaddrinfos succeed in <100ms. Also: parallelize fetches across lists via join_all (different hosts, no warming dependency), walk the full error source chain so reqwest failures surface the underlying cause, and parameterize retry delays for unit-test speed. --- src/blocklist.rs | 136 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 121 insertions(+), 15 deletions(-) diff --git a/src/blocklist.rs b/src/blocklist.rs index ef865c4..4d76eda 100644 --- a/src/blocklist.rs +++ b/src/blocklist.rs @@ -1,5 +1,5 @@ use std::collections::HashSet; -use std::time::Instant; +use std::time::{Duration, Instant}; use log::{info, warn}; @@ -355,27 +355,133 @@ mod tests { } } +const RETRY_DELAYS_SECS: &[u64] = &[2, 10, 30]; + pub async fn download_blocklists(lists: &[String]) -> Vec<(String, String)> { let client = reqwest::Client::builder() - .timeout(std::time::Duration::from_secs(30)) + .timeout(Duration::from_secs(30)) .gzip(true) .build() .unwrap_or_default(); - let mut results = Vec::new(); + let fetches = lists.iter().map(|url| { + let client = &client; + async move { + let text = fetch_with_retry(client, url).await?; + info!("downloaded blocklist: {} ({} bytes)", url, text.len()); + Some((url.clone(), text)) + } + }); + futures::future::join_all(fetches) + .await + .into_iter() + .flatten() + .collect() +} - for url in lists { - match client.get(url).send().await { - Ok(resp) => match resp.text().await { - Ok(text) => { - info!("downloaded blocklist: {} ({} bytes)", url, text.len()); - results.push((url.clone(), text)); - } - Err(e) => warn!("failed to read blocklist body {}: {}", url, e), - }, - Err(e) => warn!("failed to download blocklist {}: {}", url, e), +async fn fetch_with_retry(client: &reqwest::Client, url: &str) -> Option { + fetch_with_retry_delays(client, url, RETRY_DELAYS_SECS).await +} + +async fn fetch_with_retry_delays( + client: &reqwest::Client, + url: &str, + delays: &[u64], +) -> Option { + let total = delays.len() + 1; + for attempt in 1..=total { + match fetch_once(client, url).await { + Ok(text) => return Some(text), + Err(msg) if attempt < total => { + let delay = delays[attempt - 1]; + warn!( + "blocklist {} attempt {}/{} failed: {} — retrying in {}s", + url, attempt, total, msg, delay + ); + tokio::time::sleep(Duration::from_secs(delay)).await; + } + Err(msg) => { + warn!( + "blocklist {} attempt {}/{} failed: {} — giving up", + url, attempt, total, msg + ); + } } } - - results + None +} + +async fn fetch_once(client: &reqwest::Client, url: &str) -> Result { + let resp = client + .get(url) + .send() + .await + .map_err(|e| format_error_chain(&e))?; + resp.text().await.map_err(|e| format_error_chain(&e)) +} + +fn format_error_chain(e: &(dyn std::error::Error + 'static)) -> String { + let mut parts = vec![e.to_string()]; + let mut src = e.source(); + while let Some(s) = src { + parts.push(s.to_string()); + src = s.source(); + } + parts.join(": ") +} + +#[cfg(test)] +mod retry_tests { + use super::*; + use std::net::SocketAddr; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + use tokio::net::TcpListener; + + async fn flaky_http_server(fail_first: usize, body: &'static str) -> SocketAddr { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + for _ in 0..fail_first { + if let Ok((sock, _)) = listener.accept().await { + drop(sock); + } + } + loop { + let Ok((mut sock, _)) = listener.accept().await else { + return; + }; + tokio::spawn(async move { + let mut buf = [0u8; 2048]; + let _ = sock.read(&mut buf).await; + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Length: {}\r\nContent-Type: text/plain\r\nConnection: close\r\n\r\n{}", + body.len(), + body, + ); + let _ = sock.write_all(response.as_bytes()).await; + let _ = sock.shutdown().await; + }); + } + }); + addr + } + + #[tokio::test] + async fn retry_succeeds_after_transient_failure() { + let body = "ads.example.com\ntracker.example.net\n"; + let addr = flaky_http_server(2, body).await; + let client = reqwest::Client::new(); + let url = format!("http://{addr}/"); + let result = fetch_with_retry_delays(&client, &url, &[0, 0, 0]).await; + assert_eq!(result.as_deref(), Some(body)); + } + + #[tokio::test] + async fn retry_gives_up_when_all_attempts_fail() { + let addr = flaky_http_server(10, "").await; + let client = reqwest::Client::new(); + let url = format!("http://{addr}/"); + let result = fetch_with_retry_delays(&client, &url, &[0, 0, 0]).await; + assert_eq!(result, None); + } } -- 2.34.1 From 8bed7c46493ddab9a11eefbf0fea569064e8a0ac Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 19:11:53 +0300 Subject: [PATCH 116/139] test(blocklist): decouple retry tests from RETRY_DELAYS_SECS length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Derive both the flaky-server drop count and the zero-delay schedule from RETRY_DELAYS_SECS.len() so the tests keep exercising their intended invariants — "succeeds on final attempt" and "gives up after all attempts fail" — if the production retry schedule ever changes. Also: rename fail_first → drop_first_n to match drop(sock); swap the giveup test's empty body for an "unreachable" sentinel so a regression that accidentally served couldn't silently match Some(""). --- src/blocklist.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/blocklist.rs b/src/blocklist.rs index 4d76eda..20ac95d 100644 --- a/src/blocklist.rs +++ b/src/blocklist.rs @@ -437,11 +437,11 @@ mod retry_tests { use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::TcpListener; - async fn flaky_http_server(fail_first: usize, body: &'static str) -> SocketAddr { + async fn flaky_http_server(drop_first_n: usize, body: &'static str) -> SocketAddr { let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); tokio::spawn(async move { - for _ in 0..fail_first { + for _ in 0..drop_first_n { if let Ok((sock, _)) = listener.accept().await { drop(sock); } @@ -466,22 +466,28 @@ mod retry_tests { addr } + fn zero_delays() -> Vec { + vec![0; RETRY_DELAYS_SECS.len()] + } + #[tokio::test] - async fn retry_succeeds_after_transient_failure() { + async fn retry_succeeds_on_final_attempt() { let body = "ads.example.com\ntracker.example.net\n"; - let addr = flaky_http_server(2, body).await; + let delays = zero_delays(); + let addr = flaky_http_server(delays.len(), body).await; let client = reqwest::Client::new(); let url = format!("http://{addr}/"); - let result = fetch_with_retry_delays(&client, &url, &[0, 0, 0]).await; + let result = fetch_with_retry_delays(&client, &url, &delays).await; assert_eq!(result.as_deref(), Some(body)); } #[tokio::test] async fn retry_gives_up_when_all_attempts_fail() { - let addr = flaky_http_server(10, "").await; + let delays = zero_delays(); + let addr = flaky_http_server(delays.len() + 2, "unreachable").await; let client = reqwest::Client::new(); let url = format!("http://{addr}/"); - let result = fetch_with_retry_delays(&client, &url, &[0, 0, 0]).await; + let result = fetch_with_retry_delays(&client, &url, &delays).await; assert_eq!(result, None); } } -- 2.34.1 From 60600b045f07567f301f12723a4f372851ec9df4 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Mon, 20 Apr 2026 19:27:06 +0300 Subject: [PATCH 117/139] chore: bump version to 0.14.1 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dc95f58..c7a8742 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1547,7 +1547,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.14.0" +version = "0.14.1" dependencies = [ "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index ec3bb43..39f75a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.14.0" +version = "0.14.1" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" -- 2.34.1 From 31adc31c9b64b38ff0ce7b3847d21afd7c96fbc2 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 21 Apr 2026 16:18:52 +0300 Subject: [PATCH 118/139] refactor(ctx): coalesce forward-path upstream queries resolve_coalesced now takes leader_path: QueryPath and applies to all three upstream branches (Forwarded-rule, Recursive, Upstream), not just Recursive. Fixes thundering-herd at boot when N concurrent HTTPS setups each trigger independent forward queries for the same upstream hostname. --- src/ctx.rs | 190 ++++++++++++++++++++++++++++------------------------- 1 file changed, 102 insertions(+), 88 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 511b678..a0c15ac 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -209,106 +209,83 @@ pub async fn resolve_query( { // Conditional forwarding takes priority over recursive mode // (e.g. Tailscale .ts.net, VPC private zones) - upstream_transport = pool.preferred().map(|u| u.transport()); - match forward_with_failover_raw( - raw_wire, - pool, - &ctx.srtt, - ctx.timeout, - ctx.hedge_delay, - ) - .await - { - Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) { - Ok(resp) => (resp, QueryPath::Forwarded, DnssecStatus::Indeterminate), - Err(e) => { - error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e); - ( - DnsPacket::response_from(&query, ResultCode::SERVFAIL), - QueryPath::UpstreamError, - DnssecStatus::Indeterminate, - ) - } - }, - Err(e) => { - error!( - "{} | {:?} {} | FORWARD ERROR | {}", - src_addr, qtype, qname, e - ); - ( - DnsPacket::response_from(&query, ResultCode::SERVFAIL), - QueryPath::UpstreamError, - DnssecStatus::Indeterminate, + let key = (qname.clone(), qtype); + let (resp, path, err) = resolve_coalesced( + &ctx.inflight, + key, + &query, + QueryPath::Forwarded, + || async { + let wire = forward_with_failover_raw( + raw_wire, + pool, + &ctx.srtt, + ctx.timeout, + ctx.hedge_delay, ) - } + .await?; + cache_and_parse(ctx, &qname, qtype, &wire) + }, + ) + .await; + log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "FORWARD"); + if path == QueryPath::Forwarded { + upstream_transport = pool.preferred().map(|u| u.transport()); } + (resp, path, DnssecStatus::Indeterminate) } else if ctx.upstream_mode == UpstreamMode::Recursive { // Recursive resolution makes UDP hops to roots/TLDs/auths; // tag as Udp so the dashboard can aggregate plaintext-wire // egress honestly. Only mark on success — errors stay None. let key = (qname.clone(), qtype); - let (resp, path, err) = resolve_coalesced(&ctx.inflight, key, &query, || { - crate::recursive::resolve_recursive( - &qname, - qtype, - &ctx.cache, - &query, - &ctx.root_hints, - &ctx.srtt, - ) - }) + let (resp, path, err) = resolve_coalesced( + &ctx.inflight, + key, + &query, + QueryPath::Recursive, + || { + crate::recursive::resolve_recursive( + &qname, + qtype, + &ctx.cache, + &query, + &ctx.root_hints, + &ctx.srtt, + ) + }, + ) .await; - if path == QueryPath::Coalesced { - debug!("{} | {:?} {} | COALESCED", src_addr, qtype, qname); - } else if path == QueryPath::UpstreamError { - error!( - "{} | {:?} {} | RECURSIVE ERROR | {}", - src_addr, - qtype, - qname, - err.as_deref().unwrap_or("leader failed") - ); - } else { + log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "RECURSIVE"); + if path == QueryPath::Recursive { upstream_transport = Some(crate::stats::UpstreamTransport::Udp); } (resp, path, DnssecStatus::Indeterminate) } else { let pool = ctx.upstream_pool.lock().unwrap().clone(); - match forward_with_failover_raw( - raw_wire, - &pool, - &ctx.srtt, - ctx.timeout, - ctx.hedge_delay, - ) - .await - { - Ok(resp_wire) => match cache_and_parse(ctx, &qname, qtype, &resp_wire) { - Ok(resp) => { - upstream_transport = pool.preferred().map(|u| u.transport()); - (resp, QueryPath::Upstream, DnssecStatus::Indeterminate) - } - Err(e) => { - error!("{} | {:?} {} | PARSE ERROR | {}", src_addr, qtype, qname, e); - ( - DnsPacket::response_from(&query, ResultCode::SERVFAIL), - QueryPath::UpstreamError, - DnssecStatus::Indeterminate, - ) - } - }, - Err(e) => { - error!( - "{} | {:?} {} | UPSTREAM ERROR | {}", - src_addr, qtype, qname, e - ); - ( - DnsPacket::response_from(&query, ResultCode::SERVFAIL), - QueryPath::UpstreamError, - DnssecStatus::Indeterminate, + let key = (qname.clone(), qtype); + let (resp, path, err) = resolve_coalesced( + &ctx.inflight, + key, + &query, + QueryPath::Upstream, + || async { + let wire = forward_with_failover_raw( + raw_wire, + &pool, + &ctx.srtt, + ctx.timeout, + ctx.hedge_delay, ) - } + .await?; + cache_and_parse(ctx, &qname, qtype, &wire) + }, + ) + .await; + log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "UPSTREAM"); + if path == QueryPath::Upstream { + upstream_transport = pool.preferred().map(|u| u.transport()); } + (resp, path, DnssecStatus::Indeterminate) } } }; @@ -611,11 +588,15 @@ fn acquire_inflight(inflight: &Mutex, key: (String, QueryType)) -> /// Run a resolve function with in-flight coalescing. Multiple concurrent calls /// for the same key share a single resolution — the first caller (leader) -/// executes `resolve_fn`, and followers wait for the broadcast result. +/// executes `resolve_fn`, and followers wait for the broadcast result. The +/// leader's successful path is tagged with `leader_path` so callers that +/// share this helper (recursive, forwarded-rule, forward-upstream) keep their +/// own observability without duplicating the inflight map. async fn resolve_coalesced( inflight: &Mutex, key: (String, QueryType), query: &DnsPacket, + leader_path: QueryPath, resolve_fn: F, ) -> (DnsPacket, QueryPath, Option) where @@ -644,7 +625,7 @@ where match result { Ok(resp) => { let _ = tx.send(Some(resp.clone())); - (resp, QueryPath::Recursive, None) + (resp, leader_path, None) } Err(e) => { let _ = tx.send(None); @@ -671,6 +652,33 @@ impl Drop for InflightGuard<'_> { } } +/// Emit the log lines shared by the three upstream branches (Forwarded, +/// Recursive, Upstream) after `resolve_coalesced` returns. Leader-success +/// and transport-tagging stay at the call site since they diverge per +/// branch, but the Coalesced debug and UpstreamError error are identical +/// except for the label. +fn log_coalesced_outcome( + src_addr: SocketAddr, + qtype: QueryType, + qname: &str, + path: QueryPath, + err: Option<&str>, + label: &str, +) { + match path { + QueryPath::Coalesced => debug!("{} | {:?} {} | COALESCED", src_addr, qtype, qname), + QueryPath::UpstreamError => error!( + "{} | {:?} {} | {} ERROR | {}", + src_addr, + qtype, + qname, + label, + err.unwrap_or("leader failed") + ), + _ => {} + } +} + fn special_use_response(query: &DnsPacket, qname: &str, qtype: QueryType) -> DnsPacket { use std::net::{Ipv4Addr, Ipv6Addr}; if qname == "ipv4only.arpa" { @@ -909,7 +917,7 @@ mod tests { let key = ("coalesce.test".to_string(), QueryType::A); let query = DnsPacket::query(100 + i, "coalesce.test", QueryType::A); handles.push(tokio::spawn(async move { - resolve_coalesced(&inf, key, &query, || async { + resolve_coalesced(&inf, key, &query, QueryPath::Recursive, || async { count.fetch_add(1, std::sync::atomic::Ordering::Relaxed); tokio::time::sleep(Duration::from_millis(200)).await; Ok(mock_response("coalesce.test")) @@ -953,6 +961,7 @@ mod tests { &inf1, ("same.domain".to_string(), QueryType::A), &query_a, + QueryPath::Recursive, || async { count1.fetch_add(1, std::sync::atomic::Ordering::Relaxed); tokio::time::sleep(Duration::from_millis(100)).await; @@ -966,6 +975,7 @@ mod tests { &inf2, ("same.domain".to_string(), QueryType::AAAA), &query_aaaa, + QueryPath::Recursive, || async { count2.fetch_add(1, std::sync::atomic::Ordering::Relaxed); tokio::time::sleep(Duration::from_millis(100)).await; @@ -995,6 +1005,7 @@ mod tests { &inflight, ("will-fail.test".to_string(), QueryType::A), &query, + QueryPath::Recursive, || async { Err::("upstream timeout".into()) }, ) .await; @@ -1016,6 +1027,7 @@ mod tests { &inf, ("fail.test".to_string(), QueryType::A), &query, + QueryPath::Recursive, || async { tokio::time::sleep(Duration::from_millis(200)).await; Err::("upstream error".into()) @@ -1056,6 +1068,7 @@ mod tests { &inflight, ("question.test".to_string(), QueryType::A), &query, + QueryPath::Recursive, || async { Err::("fail".into()) }, ) .await; @@ -1080,6 +1093,7 @@ mod tests { &inflight, ("err-msg.test".to_string(), QueryType::A), &query, + QueryPath::Recursive, || async { Err::("connection refused by upstream".into()) }, ) .await; -- 2.34.1 From 10469e96bd7b3ed1ab090e84db6c8cb8d97695d3 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 21 Apr 2026 16:19:14 +0300 Subject: [PATCH 119/139] fix(bootstrap): route numa HTTPS via IP-literal bootstrap resolver (#122) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When numa is its own system DNS resolver (HAOS add-on, Pi-hole-style container, /etc/resolv.conf → 127.0.0.1), every numa-originated HTTPS connection — DoH upstream, ODoH relay/target, blocklist CDN — routed its hostname through getaddrinfo() back to numa itself. Cold boot deadlocked; steady state taxed every new TCP connection. 0.14.1's retry-with-backoff masked the startup race but not the underlying self-loop. NumaResolver implements reqwest::dns::Resolve with two lanes: - Per-host overrides (ODoH relay_ip/target_ip) short-circuit DNS entirely, preserving ODoH's zero-plain-DNS-leak property. - Otherwise: A+AAAA in parallel via UDP to IP-literal bootstrap servers, with TCP fallback for UDP-hostile networks. Bootstrap IPs come from upstream.fallback (IP-literal filtered, hostnames skipped with a warning). Empty fallback yields the hardcoded default [9.9.9.9, 1.1.1.1]; the chosen source is logged at startup so the silent default is visible. doh_keepalive_loop now fires its first tick immediately, and keepalive_doh logs failures at WARN — bootstrap issues surface within ~100ms of boot instead of on the first client query. Distinct from UpstreamPool.fallback (client-query failover) which stays untouched: client queries with no configured fallback still SERVFAIL on primary failure rather than silently shadow-routing. Reproducer: tests/docker/self-resolver-loop.sh. Before: 0 blocklist domains, 3072ms SERVFAIL. After: 397k domains, 118ms NOERROR. --- benches/recursive_compare.rs | 10 +- src/blocklist.rs | 15 +- src/bootstrap_resolver.rs | 225 +++++++++++++++++++++++++++++ src/config.rs | 22 ++- src/forward.rs | 64 +++++--- src/lib.rs | 1 + src/serve.rs | 61 ++++++-- tests/docker/self-resolver-loop.sh | 155 ++++++++++++++++++++ 8 files changed, 505 insertions(+), 48 deletions(-) create mode 100644 src/bootstrap_resolver.rs create mode 100755 tests/docker/self-resolver-loop.sh diff --git a/benches/recursive_compare.rs b/benches/recursive_compare.rs index 74f9576..4b9152c 100644 --- a/benches/recursive_compare.rs +++ b/benches/recursive_compare.rs @@ -383,7 +383,7 @@ fn run_default(rt: &tokio::runtime::Runtime) { /// Library-to-library: Numa forward_query_raw vs Hickory resolver.lookup. fn run_direct(rt: &tokio::runtime::Runtime) { - let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); let resolver = rt.block_on(build_hickory_resolver()); let timeout = Duration::from_secs(10); @@ -609,9 +609,9 @@ fn run_hedge_multi(rt: &tokio::runtime::Runtime, iterations: usize) { DOMAINS.len() ); - let primary = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); - let primary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); - let secondary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let primary = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); + let primary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); + let secondary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); let resolver = rt.block_on(build_hickory_resolver()); println!("Warming up..."); @@ -810,7 +810,7 @@ fn run_diag(rt: &tokio::runtime::Runtime) { fn run_diag_clients(rt: &tokio::runtime::Runtime) { println!("Client diagnostic: reqwest vs Hickory (20 queries to {DOH_UPSTREAM})\n"); - let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443).expect("failed to parse"); + let upstream = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); let resolver = rt.block_on(build_hickory_resolver()); let timeout = Duration::from_secs(10); diff --git a/src/blocklist.rs b/src/blocklist.rs index 20ac95d..87b43ed 100644 --- a/src/blocklist.rs +++ b/src/blocklist.rs @@ -357,12 +357,17 @@ mod tests { const RETRY_DELAYS_SECS: &[u64] = &[2, 10, 30]; -pub async fn download_blocklists(lists: &[String]) -> Vec<(String, String)> { - let client = reqwest::Client::builder() +pub async fn download_blocklists( + lists: &[String], + resolver: Option>, +) -> Vec<(String, String)> { + let mut builder = reqwest::Client::builder() .timeout(Duration::from_secs(30)) - .gzip(true) - .build() - .unwrap_or_default(); + .gzip(true); + if let Some(r) = resolver { + builder = builder.dns_resolver(r); + } + let client = builder.build().unwrap_or_default(); let fetches = lists.iter().map(|url| { let client = &client; diff --git a/src/bootstrap_resolver.rs b/src/bootstrap_resolver.rs new file mode 100644 index 0000000..fce5e4a --- /dev/null +++ b/src/bootstrap_resolver.rs @@ -0,0 +1,225 @@ +//! `reqwest` DNS resolver used by numa-originated HTTPS (DoH upstream, ODoH +//! relay/target, blocklist CDN). When numa is its own system resolver +//! (`/etc/resolv.conf → 127.0.0.1`, HAOS add-on, Pi-hole-style container), +//! the default `getaddrinfo` path loops back through numa before numa can +//! answer — a chicken-and-egg that deadlocks cold boot. See issue #122 and +//! `docs/implementation/bootstrap-resolver.md`. +//! +//! Resolution order per hostname: +//! 1. Per-hostname overrides (e.g. ODoH `relay_ip` / `target_ip`) → return +//! immediately, no DNS query. Preserves ODoH's "zero plain-DNS leak" +//! property for configured endpoints. +//! 2. Otherwise, query A + AAAA in parallel via UDP to IP-literal bootstrap +//! servers, with TCP fallback on UDP timeout (for networks that block +//! outbound UDP:53 — see memory: `project_network_udp_hostile.md`). + +use std::collections::HashMap; +use std::net::{IpAddr, Ipv4Addr, SocketAddr}; +use std::time::Duration; + +use log::{debug, info, warn}; +use reqwest::dns::{Addrs, Name, Resolve, Resolving}; + +use crate::forward::{forward_tcp, forward_udp}; +use crate::packet::DnsPacket; +use crate::question::QueryType; +use crate::record::DnsRecord; + +const UDP_TIMEOUT: Duration = Duration::from_millis(800); +const TCP_TIMEOUT: Duration = Duration::from_millis(1500); +const DEFAULT_BOOTSTRAP: &[SocketAddr] = &[ + SocketAddr::new(IpAddr::V4(Ipv4Addr::new(9, 9, 9, 9)), 53), + SocketAddr::new(IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)), 53), +]; + +pub struct NumaResolver { + bootstrap: Vec, + overrides: HashMap>, +} + +impl NumaResolver { + /// Build a resolver from the configured `upstream.fallback` list and any + /// per-hostname overrides (e.g. ODoH's `relay_ip`/`target_ip`). + /// + /// `fallback` entries are filtered to IP literals only — hostnames would + /// re-introduce the self-loop inside the resolver itself. Empty or + /// unusable fallback yields the hardcoded default (Quad9 + Cloudflare). + pub fn new(fallback: &[String], overrides: HashMap>) -> Self { + let mut bootstrap: Vec = Vec::with_capacity(fallback.len()); + for entry in fallback { + match crate::forward::parse_upstream_addr(entry, 53) { + Ok(addr) => bootstrap.push(addr), + Err(_) => { + warn!( + "bootstrap_resolver: skipping non-IP fallback '{}' \ + (hostnames would re-enter the self-loop)", + entry + ); + } + } + } + let source = if bootstrap.is_empty() { + bootstrap = DEFAULT_BOOTSTRAP.to_vec(); + "default (no IP-literal in upstream.fallback)" + } else { + "upstream.fallback" + }; + let ips: Vec = bootstrap.iter().map(|s| s.ip().to_string()).collect(); + info!( + "bootstrap resolver: {} via {} — used for numa-originated HTTPS hostname resolution", + ips.join(", "), + source + ); + Self { + bootstrap, + overrides, + } + } + + #[cfg(test)] + pub fn bootstrap(&self) -> &[SocketAddr] { + &self.bootstrap + } +} + +impl Resolve for NumaResolver { + fn resolve(&self, name: Name) -> Resolving { + let hostname = name.as_str().to_string(); + + if let Some(ips) = self.overrides.get(&hostname) { + let addrs: Vec = + ips.iter().map(|ip| SocketAddr::new(*ip, 0)).collect(); + debug!( + "bootstrap_resolver: override hit for {} → {:?}", + hostname, ips + ); + return Box::pin( + async move { Ok(Box::new(addrs.into_iter()) as Addrs) }, + ); + } + + let bootstrap = self.bootstrap.clone(); + Box::pin(async move { + let addrs = resolve_via_bootstrap(&hostname, &bootstrap).await?; + debug!( + "bootstrap_resolver: resolved {} → {} addr(s)", + hostname, + addrs.len() + ); + Ok(Box::new(addrs.into_iter()) as Addrs) + }) + } +} + +async fn resolve_via_bootstrap( + hostname: &str, + bootstrap: &[SocketAddr], +) -> Result, Box> { + let mut last_err: Option = None; + for &server in bootstrap { + let q_a = DnsPacket::query(0xBEEF, hostname, QueryType::A); + let q_aaaa = DnsPacket::query(0xBEF0, hostname, QueryType::AAAA); + let (a_res, aaaa_res) = tokio::join!( + query_with_tcp_fallback(&q_a, server), + query_with_tcp_fallback(&q_aaaa, server), + ); + + let mut out = Vec::new(); + match a_res { + Ok(pkt) => extract_addrs(&pkt, &mut out), + Err(e) => last_err = Some(format!("{} A failed: {}", server, e)), + } + match aaaa_res { + Ok(pkt) => extract_addrs(&pkt, &mut out), + // AAAA is optional — many hosts return NXDOMAIN/empty. Don't + // treat as the primary error if A succeeded. + Err(e) => debug!("bootstrap {} AAAA for {} failed: {}", server, hostname, e), + } + if !out.is_empty() { + return Ok(out); + } + } + Err(last_err + .unwrap_or_else(|| "no bootstrap servers reachable".into()) + .into()) +} + +async fn query_with_tcp_fallback(query: &DnsPacket, server: SocketAddr) -> crate::Result { + match forward_udp(query, server, UDP_TIMEOUT).await { + Ok(pkt) => Ok(pkt), + Err(e) => { + debug!( + "bootstrap UDP {} failed ({}), falling back to TCP", + server, e + ); + forward_tcp(query, server, TCP_TIMEOUT).await + } + } +} + +fn extract_addrs(pkt: &DnsPacket, out: &mut Vec) { + for r in &pkt.answers { + match r { + DnsRecord::A { addr, .. } => out.push(SocketAddr::new(IpAddr::V4(*addr), 0)), + DnsRecord::AAAA { addr, .. } => out.push(SocketAddr::new(IpAddr::V6(*addr), 0)), + _ => {} + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::net::{Ipv4Addr, Ipv6Addr}; + + #[test] + fn empty_fallback_uses_defaults() { + let r = NumaResolver::new(&[], HashMap::new()); + let got: Vec = r.bootstrap().iter().map(|s| s.to_string()).collect(); + assert_eq!(got, vec!["9.9.9.9:53", "1.1.1.1:53"]); + } + + #[test] + fn fallback_accepts_ip_literals_only() { + let fallback = vec![ + "9.9.9.9".to_string(), + "dns.quad9.net".to_string(), + "1.1.1.1:5353".to_string(), + ]; + let r = NumaResolver::new(&fallback, HashMap::new()); + let got: Vec = r.bootstrap().iter().map(|s| s.to_string()).collect(); + assert_eq!(got, vec!["9.9.9.9:53", "1.1.1.1:5353"]); + } + + #[test] + fn override_returns_configured_ips_without_dns() { + let mut overrides = HashMap::new(); + overrides.insert( + "odoh-relay.example".to_string(), + vec![IpAddr::V4(Ipv4Addr::new(178, 104, 229, 30))], + ); + let r = NumaResolver::new(&[], overrides); + let name: Name = "odoh-relay.example".parse().unwrap(); + let fut = r.resolve(name); + let res = futures::executor::block_on(fut).unwrap(); + let addrs: Vec<_> = res.collect(); + assert_eq!(addrs.len(), 1); + assert_eq!(addrs[0].ip(), IpAddr::V4(Ipv4Addr::new(178, 104, 229, 30))); + } + + #[test] + fn override_supports_multiple_ips_including_ipv6() { + let mut overrides = HashMap::new(); + overrides.insert( + "dual.example".to_string(), + vec![ + IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)), + IpAddr::V6(Ipv6Addr::LOCALHOST), + ], + ); + let r = NumaResolver::new(&[], overrides); + let res = futures::executor::block_on(r.resolve("dual.example".parse().unwrap())).unwrap(); + let addrs: Vec<_> = res.collect(); + assert_eq!(addrs.len(), 2); + } +} diff --git a/src/config.rs b/src/config.rs index 3a41d24..272e6c6 100644 --- a/src/config.rs +++ b/src/config.rs @@ -56,7 +56,7 @@ impl ForwardingRuleConfig { } let mut primary = Vec::with_capacity(self.upstream.len()); for s in &self.upstream { - let u = crate::forward::parse_upstream(s, 53) + let u = crate::forward::parse_upstream(s, 53, None) .map_err(|e| format!("forwarding rule for upstream '{}': {}", s, e))?; primary.push(u); } @@ -241,6 +241,26 @@ pub struct OdohUpstream { pub target_bootstrap: Option, } +impl OdohUpstream { + /// Per-host IP overrides for the bootstrap resolver, lifted from + /// `relay_ip`/`target_ip`. Keeps the "zero plain-DNS leak for ODoH + /// endpoints" property when numa is its own system resolver. + pub fn host_ip_overrides(&self) -> std::collections::HashMap> { + let mut out = std::collections::HashMap::new(); + if let Some(addr) = self.relay_bootstrap { + out.entry(self.relay_host.clone()) + .or_insert_with(Vec::new) + .push(addr.ip()); + } + if let Some(addr) = self.target_bootstrap { + out.entry(self.target_host.clone()) + .or_insert_with(Vec::new) + .push(addr.ip()); + } + out + } +} + impl UpstreamConfig { /// Validate and extract ODoH-specific fields. Called during `load_config` /// so misconfigured ODoH fails fast at startup, the same care we take diff --git a/src/forward.rs b/src/forward.rs index 530f1ed..892e5b6 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -113,7 +113,7 @@ impl fmt::Display for Upstream { } } -pub(crate) fn parse_upstream_addr( +pub fn parse_upstream_addr( s: &str, default_port: u16, ) -> std::result::Result { @@ -129,19 +129,28 @@ pub(crate) fn parse_upstream_addr( } /// Parse a slice of upstream address strings into `Upstream` values, failing -/// on the first invalid entry. -pub fn parse_upstream_list(addrs: &[String], default_port: u16) -> Result> { +/// on the first invalid entry. DoH entries use `resolver` (when provided) as +/// their hostname resolver. +pub fn parse_upstream_list( + addrs: &[String], + default_port: u16, + resolver: Option>, +) -> Result> { addrs .iter() - .map(|s| parse_upstream(s, default_port)) + .map(|s| parse_upstream(s, default_port, resolver.clone())) .collect() } -pub fn parse_upstream(s: &str, default_port: u16) -> Result { +pub fn parse_upstream( + s: &str, + default_port: u16, + resolver: Option>, +) -> Result { if s.starts_with("https://") { return Ok(Upstream::Doh { url: s.to_string(), - client: build_https_client(), + client: build_https_client_with_resolver(1, resolver), }); } // tls://IP:PORT#hostname or tls://IP#hostname (default port 853) @@ -163,12 +172,16 @@ pub fn parse_upstream(s: &str, default_port: u16) -> Result { } /// HTTP/2 client tuned for DoH/ODoH: small windows for low latency, long-lived -/// keep-alive. Shared by the DoH upstream and the ODoH config-fetcher + -/// seal/open path. Pool defaults to one idle conn per host — good for -/// resolvers that talk to a single upstream; relays that fan out to many -/// targets should use [`build_https_client_with_pool`]. +/// keep-alive. Pool defaults to one idle conn per host — good for resolvers +/// that talk to a single upstream; relays that fan out to many targets +/// should use [`build_https_client_with_pool`]. +/// +/// Uses the system resolver. Callers running inside `serve::run` pass the +/// shared [`crate::bootstrap_resolver::NumaResolver`] via +/// [`build_https_client_with_resolver`] to avoid the self-loop documented +/// in `docs/implementation/bootstrap-resolver.md`. pub fn build_https_client() -> reqwest::Client { - build_https_client_with_pool(1) + build_https_client_with_resolver(1, None) } /// Same shape as [`build_https_client`], but caller picks @@ -176,20 +189,18 @@ pub fn build_https_client() -> reqwest::Client { /// and benefit from a larger pool so warm connections survive concurrent /// fan-out. pub fn build_https_client_with_pool(pool_max_idle_per_host: usize) -> reqwest::Client { - https_client_builder(pool_max_idle_per_host) - .build() - .unwrap_or_default() + build_https_client_with_resolver(pool_max_idle_per_host, None) } -/// HTTPS client for the ODoH upstream, with bootstrap-IP overrides applied -/// so relay/target hostname resolution can bypass system DNS. -pub fn build_odoh_client(odoh: &crate::config::OdohUpstream) -> reqwest::Client { - let mut builder = https_client_builder(1); - if let Some(addr) = odoh.relay_bootstrap { - builder = builder.resolve(&odoh.relay_host, addr); - } - if let Some(addr) = odoh.target_bootstrap { - builder = builder.resolve(&odoh.target_host, addr); +/// [`build_https_client`] with an optional custom DNS resolver. Numa wires +/// [`crate::bootstrap_resolver::NumaResolver`] here. +pub fn build_https_client_with_resolver( + pool_max_idle_per_host: usize, + resolver: Option>, +) -> reqwest::Client { + let mut builder = https_client_builder(pool_max_idle_per_host); + if let Some(r) = resolver { + builder = builder.dns_resolver(r); } builder.build().unwrap_or_default() } @@ -553,6 +564,9 @@ async fn forward_doh_raw( /// Send a lightweight keepalive query to a DoH upstream to prevent /// the HTTP/2 + TLS connection from going idle and being torn down. +/// The first call doubles as a startup warm-up: bootstrap-resolver failures +/// (unreachable Quad9/Cloudflare defaults, misconfigured hostname upstream) +/// surface here rather than on the first client query. pub async fn keepalive_doh(upstream: &Upstream) { if let Upstream::Doh { url, client } = upstream { // Query for . NS — minimal, always succeeds, response is small @@ -565,7 +579,9 @@ pub async fn keepalive_doh(upstream: &Upstream) { 0x00, 0x02, // type NS 0x00, 0x01, // class IN ]; - let _ = forward_doh_raw(wire, url, client, Duration::from_secs(5)).await; + if let Err(e) = forward_doh_raw(wire, url, client, Duration::from_secs(5)).await { + log::warn!("DoH keepalive to {} failed: {}", url, e); + } } } diff --git a/src/lib.rs b/src/lib.rs index aec568d..9b6af11 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod api; pub mod blocklist; +pub mod bootstrap_resolver; pub mod buffer; pub mod cache; pub mod config; diff --git a/src/serve.rs b/src/serve.rs index 9b4b587..1aa1fdb 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -13,13 +13,12 @@ use log::{error, info}; use tokio::net::UdpSocket; use crate::blocklist::{download_blocklists, parse_blocklist, BlocklistStore}; +use crate::bootstrap_resolver::NumaResolver; use crate::buffer::BytePacketBuffer; use crate::cache::DnsCache; use crate::config::{build_zone_map, load_config, ConfigLoad}; use crate::ctx::{handle_query, ServerCtx}; -use crate::forward::{ - build_https_client, build_odoh_client, parse_upstream_list, Upstream, UpstreamPool, -}; +use crate::forward::{build_https_client_with_resolver, parse_upstream_list, Upstream, UpstreamPool}; use crate::odoh::OdohConfigCache; use crate::override_store::OverrideStore; use crate::query_log::QueryLog; @@ -48,6 +47,23 @@ pub async fn run(config_path: String) -> crate::Result<()> { (dummy, "recursive (root hints)".to_string()) }; + // Routes numa-originated HTTPS (DoH upstream, ODoH relay/target, blocklist + // CDN) away from the system resolver so lookups don't loop back through + // numa when it's its own system DNS. + // See `docs/implementation/bootstrap-resolver.md`. + let resolver_overrides = match config.upstream.mode { + crate::config::UpstreamMode::Odoh => config + .upstream + .odoh_upstream() + .map(|o| o.host_ip_overrides()) + .unwrap_or_default(), + _ => std::collections::HashMap::new(), + }; + let bootstrap_resolver: Arc = Arc::new(NumaResolver::new( + &config.upstream.fallback, + resolver_overrides, + )); + let (resolved_mode, upstream_auto, pool, upstream_label) = match config.upstream.mode { crate::config::UpstreamMode::Auto => { info!("auto mode: probing recursive resolution..."); @@ -57,7 +73,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { (crate::config::UpstreamMode::Recursive, false, pool, label) } else { log::warn!("recursive probe failed — falling back to Quad9 DoH"); - let client = build_https_client(); + let client = build_https_client_with_resolver(1, Some(bootstrap_resolver.clone())); let url = DOH_FALLBACK.to_string(); let label = url.clone(); let pool = UpstreamPool::new(vec![Upstream::Doh { url, client }], vec![]); @@ -82,8 +98,16 @@ pub async fn run(config_path: String) -> crate::Result<()> { config.upstream.address.clone() }; - let primary = parse_upstream_list(&addrs, config.upstream.port)?; - let fallback = parse_upstream_list(&config.upstream.fallback, config.upstream.port)?; + let primary = parse_upstream_list( + &addrs, + config.upstream.port, + Some(bootstrap_resolver.clone()), + )?; + let fallback = parse_upstream_list( + &config.upstream.fallback, + config.upstream.port, + Some(bootstrap_resolver.clone()), + )?; let pool = UpstreamPool::new(primary, fallback); let label = pool.label(); @@ -96,7 +120,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { } crate::config::UpstreamMode::Odoh => { let odoh = config.upstream.odoh_upstream()?; - let client = build_odoh_client(&odoh); + let client = build_https_client_with_resolver(1, Some(bootstrap_resolver.clone())); let target_config = Arc::new(OdohConfigCache::new( odoh.target_host.clone(), client.clone(), @@ -110,7 +134,11 @@ pub async fn run(config_path: String) -> crate::Result<()> { let fallback = if odoh.strict { Vec::new() } else { - parse_upstream_list(&config.upstream.fallback, config.upstream.port)? + parse_upstream_list( + &config.upstream.fallback, + config.upstream.port, + Some(bootstrap_resolver.clone()), + )? }; let pool = UpstreamPool::new(primary, fallback); let label = pool.label(); @@ -405,8 +433,9 @@ pub async fn run(config_path: String) -> crate::Result<()> { if config.blocking.enabled && !blocklist_lists.is_empty() { let bl_ctx = Arc::clone(&ctx); let bl_lists = blocklist_lists.clone(); + let bl_resolver = bootstrap_resolver.clone(); tokio::spawn(async move { - load_blocklists(&bl_ctx, &bl_lists).await; + load_blocklists(&bl_ctx, &bl_lists, Some(bl_resolver.clone())).await; // Periodic refresh let mut interval = tokio::time::interval(Duration::from_secs(refresh_hours * 3600)); @@ -414,7 +443,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { loop { interval.tick().await; info!("refreshing blocklists..."); - load_blocklists(&bl_ctx, &bl_lists).await; + load_blocklists(&bl_ctx, &bl_lists, Some(bl_resolver.clone())).await; } }); } @@ -596,8 +625,12 @@ async fn network_watch_loop(ctx: Arc) { } } -async fn load_blocklists(ctx: &ServerCtx, lists: &[String]) { - let downloaded = download_blocklists(lists).await; +async fn load_blocklists( + ctx: &ServerCtx, + lists: &[String], + resolver: Option>, +) { + let downloaded = download_blocklists(lists, resolver).await; // Parse outside the lock to avoid blocking DNS queries during parse (~100ms) let mut all_domains = std::collections::HashSet::new(); @@ -632,8 +665,10 @@ async fn warm_domain(ctx: &ServerCtx, domain: &str) { } async fn doh_keepalive_loop(ctx: Arc) { + // First tick fires immediately so we surface bootstrap-resolver failures + // (unreachable Quad9/Cloudflare, blocked :53, bad upstream hostname) in + // the startup logs instead of on the first client query. let mut interval = tokio::time::interval(Duration::from_secs(25)); - interval.tick().await; // skip first immediate tick loop { interval.tick().await; let pool = ctx.upstream_pool.lock().unwrap().clone(); diff --git a/tests/docker/self-resolver-loop.sh b/tests/docker/self-resolver-loop.sh new file mode 100755 index 0000000..400b12c --- /dev/null +++ b/tests/docker/self-resolver-loop.sh @@ -0,0 +1,155 @@ +#!/usr/bin/env bash +# +# Reproducer for issue #122 — chicken-and-egg when numa is its own system +# resolver (HAOS add-on, Pi-hole-style container, laptop with +# resolv.conf → 127.0.0.1). +# +# Topology: +# container /etc/resolv.conf → nameserver 127.0.0.1 +# numa bound on :53 → upstream DoH by hostname (quad9) +# numa boots → spawns blocklist download +# reqwest::get → getaddrinfo("cdn.jsdelivr.net") +# → loopback UDP :53 → numa → cache miss → DoH upstream +# → getaddrinfo("dns.quad9.net") → same loop → glibc EAI_AGAIN +# +# Expected on master: both assertions FAIL (bug reproduced). +# Expected after bootstrap-IP fix: both assertions PASS. +# +# Requirements: docker (with internet access for external lists/DoH) +# Usage: ./tests/docker/self-resolver-loop.sh + +set -euo pipefail + +cd "$(dirname "$0")/../.." + +GREEN="\033[32m"; RED="\033[31m"; RESET="\033[0m" + +pass() { printf " ${GREEN}✓${RESET} %s\n" "$1"; } +fail() { printf " ${RED}✗${RESET} %s\n" "$1"; printf " %s\n" "$2"; FAILED=$((FAILED+1)); } +FAILED=0 + +OUT=/tmp/numa-self-resolver.out + +echo "── self-resolver-loop: building + reproducing on debian:bookworm ──" +echo " (first run is slow: image pull + cold cargo build, ~5-8 min)" +echo + +docker run --rm \ + -v "$PWD:/src:ro" \ + -v numa-self-resolver-cargo:/root/.cargo \ + -v numa-self-resolver-target:/work/target \ + debian:bookworm bash -c ' +set -e + +# Phase 1: install deps + build with the container DNS as given by Docker +# (resolves deb.debian.org, static.rust-lang.org, crates.io). +apt-get update -qq && apt-get install -y -qq curl build-essential dnsutils 2>&1 | tail -3 + +if ! command -v cargo &>/dev/null; then + curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --quiet +fi +. "$HOME/.cargo/env" + +mkdir -p /work +tar -C /src --exclude=./target --exclude=./.git -cf - . | tar -C /work -xf - +cd /work + +echo "── cargo build --release --locked ──" +cargo build --release --locked 2>&1 | tail -5 +echo + +# Phase 2: flip system DNS to numa itself — this is the pathological +# topology from issue #122 (HAOS add-on, resolv.conf → 127.0.0.1). +# Everything after this point, any getaddrinfo call inside numa loops +# back through :53. +echo "nameserver 127.0.0.1" > /etc/resolv.conf +echo "── /etc/resolv.conf inside container (post-flip) ──" +cat /etc/resolv.conf +echo + +cat > /tmp/numa.toml < /tmp/numa.log 2>&1 & +NUMA_PID=$! + +# Wait up to 120s for blocklist to populate. +# Retry delays 2+10+30s = 42s, plus ~4 × ~10s getaddrinfo timeouts under +# self-loop = ~82s worst case. 120s leaves headroom. +LOADED=0 +for i in $(seq 1 120); do + LOADED=$(curl -sf http://127.0.0.1:5380/blocking/stats 2>/dev/null \ + | grep -o "\"domains_loaded\":[0-9]*" | cut -d: -f2 || echo 0) + [ "${LOADED:-0}" -gt 100 ] && break + sleep 1 +done + +# First cold DoH query — time it. +START=$(date +%s%N) +dig @127.0.0.1 example.com A +time=15 +tries=1 > /tmp/dig.out 2>&1 || true +END=$(date +%s%N) +LATENCY_MS=$(( (END - START) / 1000000 )) +STATUS=$(grep -oE "status: [A-Z]+" /tmp/dig.out | head -1 || echo "status: TIMEOUT") + +kill $NUMA_PID 2>/dev/null || true +wait $NUMA_PID 2>/dev/null || true + +echo +echo "=== RESULT ===" +echo "domains_loaded=$LOADED" +echo "first_query_latency_ms=$LATENCY_MS" +echo "first_query_${STATUS// /_}" +echo +echo "=== numa.log (tail 40) ===" +tail -40 /tmp/numa.log +echo +echo "=== dig.out ===" +cat /tmp/dig.out +' 2>&1 | tee "$OUT" + +echo +echo "── assertions ──" + +LOADED=$(grep '^domains_loaded=' "$OUT" | tail -1 | cut -d= -f2 || echo 0) +LATENCY=$(grep '^first_query_latency_ms=' "$OUT" | tail -1 | cut -d= -f2 || echo 999999) +STATUS_LINE=$(grep '^first_query_status_' "$OUT" | tail -1 || echo "first_query_status_TIMEOUT") + +if [ "${LOADED:-0}" -gt 100 ]; then + pass "blocklist downloaded (domains_loaded=$LOADED)" +else + fail "blocklist downloaded (got domains_loaded=${LOADED:-0}, expected >100)" \ + "chicken-and-egg: blocklist HTTPS client has no DNS bootstrap; getaddrinfo loops through numa" +fi + +if [ "${LATENCY:-999999}" -lt 2000 ]; then + pass "first DoH query under 2s (latency=${LATENCY}ms, $STATUS_LINE)" +else + fail "first DoH query under 2s (got ${LATENCY}ms, $STATUS_LINE)" \ + "self-loop on getaddrinfo(upstream_host); plain DoH needs bootstrap-IP symmetry with ODoH" +fi + +echo +if [ "$FAILED" -eq 0 ]; then + printf "${GREEN}── self-resolver-loop passed (fix is in place) ──${RESET}\n" + exit 0 +else + printf "${RED}── self-resolver-loop failed ($FAILED assertion(s)) — bug #122 reproduced ──${RESET}\n" + exit 1 +fi -- 2.34.1 From 459395203d5b64deda9d071f0b3cec8ec89c2911 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 21 Apr 2026 16:30:26 +0300 Subject: [PATCH 120/139] style: cargo fmt --- benches/recursive_compare.rs | 6 ++++-- src/bootstrap_resolver.rs | 12 +++++------ src/ctx.rs | 39 +++++++++++------------------------- src/forward.rs | 5 +---- src/serve.rs | 10 ++++----- 5 files changed, 27 insertions(+), 45 deletions(-) diff --git a/benches/recursive_compare.rs b/benches/recursive_compare.rs index 4b9152c..7649ab0 100644 --- a/benches/recursive_compare.rs +++ b/benches/recursive_compare.rs @@ -610,8 +610,10 @@ fn run_hedge_multi(rt: &tokio::runtime::Runtime, iterations: usize) { ); let primary = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); - let primary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); - let secondary_dual = numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); + let primary_dual = + numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); + let secondary_dual = + numa::forward::parse_upstream(DOH_UPSTREAM, 443, None).expect("failed to parse"); let resolver = rt.block_on(build_hickory_resolver()); println!("Warming up..."); diff --git a/src/bootstrap_resolver.rs b/src/bootstrap_resolver.rs index fce5e4a..1cf5c2e 100644 --- a/src/bootstrap_resolver.rs +++ b/src/bootstrap_resolver.rs @@ -87,15 +87,12 @@ impl Resolve for NumaResolver { let hostname = name.as_str().to_string(); if let Some(ips) = self.overrides.get(&hostname) { - let addrs: Vec = - ips.iter().map(|ip| SocketAddr::new(*ip, 0)).collect(); + let addrs: Vec = ips.iter().map(|ip| SocketAddr::new(*ip, 0)).collect(); debug!( "bootstrap_resolver: override hit for {} → {:?}", hostname, ips ); - return Box::pin( - async move { Ok(Box::new(addrs.into_iter()) as Addrs) }, - ); + return Box::pin(async move { Ok(Box::new(addrs.into_iter()) as Addrs) }); } let bootstrap = self.bootstrap.clone(); @@ -144,7 +141,10 @@ async fn resolve_via_bootstrap( .into()) } -async fn query_with_tcp_fallback(query: &DnsPacket, server: SocketAddr) -> crate::Result { +async fn query_with_tcp_fallback( + query: &DnsPacket, + server: SocketAddr, +) -> crate::Result { match forward_udp(query, server, UDP_TIMEOUT).await { Ok(pkt) => Ok(pkt), Err(e) => { diff --git a/src/ctx.rs b/src/ctx.rs index a0c15ac..0d39f7d 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -210,12 +210,8 @@ pub async fn resolve_query( // Conditional forwarding takes priority over recursive mode // (e.g. Tailscale .ts.net, VPC private zones) let key = (qname.clone(), qtype); - let (resp, path, err) = resolve_coalesced( - &ctx.inflight, - key, - &query, - QueryPath::Forwarded, - || async { + let (resp, path, err) = + resolve_coalesced(&ctx.inflight, key, &query, QueryPath::Forwarded, || async { let wire = forward_with_failover_raw( raw_wire, pool, @@ -225,9 +221,8 @@ pub async fn resolve_query( ) .await?; cache_and_parse(ctx, &qname, qtype, &wire) - }, - ) - .await; + }) + .await; log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "FORWARD"); if path == QueryPath::Forwarded { upstream_transport = pool.preferred().map(|u| u.transport()); @@ -238,12 +233,8 @@ pub async fn resolve_query( // tag as Udp so the dashboard can aggregate plaintext-wire // egress honestly. Only mark on success — errors stay None. let key = (qname.clone(), qtype); - let (resp, path, err) = resolve_coalesced( - &ctx.inflight, - key, - &query, - QueryPath::Recursive, - || { + let (resp, path, err) = + resolve_coalesced(&ctx.inflight, key, &query, QueryPath::Recursive, || { crate::recursive::resolve_recursive( &qname, qtype, @@ -252,9 +243,8 @@ pub async fn resolve_query( &ctx.root_hints, &ctx.srtt, ) - }, - ) - .await; + }) + .await; log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "RECURSIVE"); if path == QueryPath::Recursive { upstream_transport = Some(crate::stats::UpstreamTransport::Udp); @@ -263,12 +253,8 @@ pub async fn resolve_query( } else { let pool = ctx.upstream_pool.lock().unwrap().clone(); let key = (qname.clone(), qtype); - let (resp, path, err) = resolve_coalesced( - &ctx.inflight, - key, - &query, - QueryPath::Upstream, - || async { + let (resp, path, err) = + resolve_coalesced(&ctx.inflight, key, &query, QueryPath::Upstream, || async { let wire = forward_with_failover_raw( raw_wire, &pool, @@ -278,9 +264,8 @@ pub async fn resolve_query( ) .await?; cache_and_parse(ctx, &qname, qtype, &wire) - }, - ) - .await; + }) + .await; log_coalesced_outcome(src_addr, qtype, &qname, path, err.as_deref(), "UPSTREAM"); if path == QueryPath::Upstream { upstream_transport = pool.preferred().map(|u| u.transport()); diff --git a/src/forward.rs b/src/forward.rs index 892e5b6..e3f307b 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -113,10 +113,7 @@ impl fmt::Display for Upstream { } } -pub fn parse_upstream_addr( - s: &str, - default_port: u16, -) -> std::result::Result { +pub fn parse_upstream_addr(s: &str, default_port: u16) -> std::result::Result { // Try full socket addr first: "1.2.3.4:5353" or "[::1]:5353" if let Ok(addr) = s.parse::() { return Ok(addr); diff --git a/src/serve.rs b/src/serve.rs index 1aa1fdb..288f6f8 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -18,7 +18,9 @@ use crate::buffer::BytePacketBuffer; use crate::cache::DnsCache; use crate::config::{build_zone_map, load_config, ConfigLoad}; use crate::ctx::{handle_query, ServerCtx}; -use crate::forward::{build_https_client_with_resolver, parse_upstream_list, Upstream, UpstreamPool}; +use crate::forward::{ + build_https_client_with_resolver, parse_upstream_list, Upstream, UpstreamPool, +}; use crate::odoh::OdohConfigCache; use crate::override_store::OverrideStore; use crate::query_log::QueryLog; @@ -625,11 +627,7 @@ async fn network_watch_loop(ctx: Arc) { } } -async fn load_blocklists( - ctx: &ServerCtx, - lists: &[String], - resolver: Option>, -) { +async fn load_blocklists(ctx: &ServerCtx, lists: &[String], resolver: Option>) { let downloaded = download_blocklists(lists, resolver).await; // Parse outside the lock to avoid blocking DNS queries during parse (~100ms) -- 2.34.1 From 51cce0347bbaf845c63957827553c78963a08376 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 21 Apr 2026 17:35:59 +0300 Subject: [PATCH 121/139] test(odoh): integration-verify relay_ip/target_ip override wiring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suite 8 now ends with a config using RFC 5737 TEST-NET-1 IPs as relay_ip/target_ip, started briefly so the bootstrap resolver logs its override map. Asserts both host=IP pairs land in that map — closing the gap flagged on PR #126 (zero-plain-DNS-leak for ODoH endpoints was only unit-tested). Also: NumaResolver::new now logs the override map at INFO when non-empty, so operators can verify their ODoH bootstrap without needing DEBUG level. --- src/bootstrap_resolver.rs | 11 ++++++++++ tests/integration.sh | 46 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/src/bootstrap_resolver.rs b/src/bootstrap_resolver.rs index 1cf5c2e..94b03ea 100644 --- a/src/bootstrap_resolver.rs +++ b/src/bootstrap_resolver.rs @@ -70,6 +70,17 @@ impl NumaResolver { ips.join(", "), source ); + if !overrides.is_empty() { + let mut pairs: Vec = overrides + .iter() + .flat_map(|(host, ips)| ips.iter().map(move |ip| format!("{}={}", host, ip))) + .collect(); + pairs.sort(); + info!( + "bootstrap resolver: host overrides (skip DNS, connect direct): {}", + pairs.join(", ") + ); + } Self { bootstrap, overrides, diff --git a/tests/integration.sh b/tests/integration.sh index 77b874f..1773c11 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -975,6 +975,52 @@ check "Same-host relay+target rejected at startup" \ "same host" \ "$STARTUP_OUT" +# relay_ip / target_ip must land in the bootstrap resolver's override map, +# so reqwest connects direct to the configured IPs instead of resolving the +# hostnames via plain DNS (ODoH's zero-plain-DNS-leak property). Using +# RFC 5737 TEST-NET-1 IPs — never routable, so the OdohConfigCache won't +# actually connect, but the override-map wiring is visible in the startup log. +cat > "$CONFIG" << 'CONF' +[server] +bind_addr = "127.0.0.1:5354" +api_port = 5381 + +[upstream] +mode = "odoh" +relay = "https://odoh-relay.example.com/proxy" +target = "https://odoh-target.example.org/dns-query" +relay_ip = "192.0.2.1" +target_ip = "192.0.2.2" + +[cache] +max_entries = 10000 + +[blocking] +enabled = false + +[proxy] +enabled = false +CONF + +RUST_LOG=info "$BINARY" "$CONFIG" > "$LOG" 2>&1 & +NUMA_PID=$! +for _ in $(seq 1 30); do + curl -sf "http://127.0.0.1:$API_PORT/health" >/dev/null 2>&1 && break + sleep 0.1 +done + +OVERRIDE_LOG=$(grep 'bootstrap resolver: host overrides' "$LOG" || true) +check "relay_ip wired into bootstrap override map" \ + "odoh-relay.example.com=192.0.2.1" \ + "$OVERRIDE_LOG" +check "target_ip wired into bootstrap override map" \ + "odoh-target.example.org=192.0.2.2" \ + "$OVERRIDE_LOG" + +kill "$NUMA_PID" 2>/dev/null || true +wait "$NUMA_PID" 2>/dev/null || true +sleep 1 + fi # end Suite 8 # ---- Suite 9: Numa's own ODoH relay (--relay-mode) ---- -- 2.34.1 From 5cba02a6c8d2fb78ac9a68dc31ed897581e32fbd Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Tue, 21 Apr 2026 18:06:22 +0300 Subject: [PATCH 122/139] refactor(bootstrap): BTreeMap for overrides + simplify review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Switch overrides from HashMap to BTreeMap — deterministic iteration by type, drops the manual sort when logging. - Rename the flat_map closure's inner `ips` to `addrs` to stop shadowing the outer Vec. - Trim the Suite 8 TEST-NET-1 comment to keep the "why" and drop mechanism narration. - Drop a redundant sleep 1 after wait — wait already blocks on exit. --- src/bootstrap_resolver.rs | 19 +++++++++---------- src/config.rs | 4 ++-- src/serve.rs | 2 +- tests/integration.sh | 10 ++++------ 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/bootstrap_resolver.rs b/src/bootstrap_resolver.rs index 94b03ea..c3be8bd 100644 --- a/src/bootstrap_resolver.rs +++ b/src/bootstrap_resolver.rs @@ -13,7 +13,7 @@ //! servers, with TCP fallback on UDP timeout (for networks that block //! outbound UDP:53 — see memory: `project_network_udp_hostile.md`). -use std::collections::HashMap; +use std::collections::BTreeMap; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::time::Duration; @@ -34,7 +34,7 @@ const DEFAULT_BOOTSTRAP: &[SocketAddr] = &[ pub struct NumaResolver { bootstrap: Vec, - overrides: HashMap>, + overrides: BTreeMap>, } impl NumaResolver { @@ -44,7 +44,7 @@ impl NumaResolver { /// `fallback` entries are filtered to IP literals only — hostnames would /// re-introduce the self-loop inside the resolver itself. Empty or /// unusable fallback yields the hardcoded default (Quad9 + Cloudflare). - pub fn new(fallback: &[String], overrides: HashMap>) -> Self { + pub fn new(fallback: &[String], overrides: BTreeMap>) -> Self { let mut bootstrap: Vec = Vec::with_capacity(fallback.len()); for entry in fallback { match crate::forward::parse_upstream_addr(entry, 53) { @@ -71,11 +71,10 @@ impl NumaResolver { source ); if !overrides.is_empty() { - let mut pairs: Vec = overrides + let pairs: Vec = overrides .iter() - .flat_map(|(host, ips)| ips.iter().map(move |ip| format!("{}={}", host, ip))) + .flat_map(|(host, addrs)| addrs.iter().map(move |ip| format!("{}={}", host, ip))) .collect(); - pairs.sort(); info!( "bootstrap resolver: host overrides (skip DNS, connect direct): {}", pairs.join(", ") @@ -185,7 +184,7 @@ mod tests { #[test] fn empty_fallback_uses_defaults() { - let r = NumaResolver::new(&[], HashMap::new()); + let r = NumaResolver::new(&[], BTreeMap::new()); let got: Vec = r.bootstrap().iter().map(|s| s.to_string()).collect(); assert_eq!(got, vec!["9.9.9.9:53", "1.1.1.1:53"]); } @@ -197,14 +196,14 @@ mod tests { "dns.quad9.net".to_string(), "1.1.1.1:5353".to_string(), ]; - let r = NumaResolver::new(&fallback, HashMap::new()); + let r = NumaResolver::new(&fallback, BTreeMap::new()); let got: Vec = r.bootstrap().iter().map(|s| s.to_string()).collect(); assert_eq!(got, vec!["9.9.9.9:53", "1.1.1.1:5353"]); } #[test] fn override_returns_configured_ips_without_dns() { - let mut overrides = HashMap::new(); + let mut overrides = BTreeMap::new(); overrides.insert( "odoh-relay.example".to_string(), vec![IpAddr::V4(Ipv4Addr::new(178, 104, 229, 30))], @@ -220,7 +219,7 @@ mod tests { #[test] fn override_supports_multiple_ips_including_ipv6() { - let mut overrides = HashMap::new(); + let mut overrides = BTreeMap::new(); overrides.insert( "dual.example".to_string(), vec![ diff --git a/src/config.rs b/src/config.rs index 272e6c6..6daf430 100644 --- a/src/config.rs +++ b/src/config.rs @@ -245,8 +245,8 @@ impl OdohUpstream { /// Per-host IP overrides for the bootstrap resolver, lifted from /// `relay_ip`/`target_ip`. Keeps the "zero plain-DNS leak for ODoH /// endpoints" property when numa is its own system resolver. - pub fn host_ip_overrides(&self) -> std::collections::HashMap> { - let mut out = std::collections::HashMap::new(); + pub fn host_ip_overrides(&self) -> std::collections::BTreeMap> { + let mut out = std::collections::BTreeMap::new(); if let Some(addr) = self.relay_bootstrap { out.entry(self.relay_host.clone()) .or_insert_with(Vec::new) diff --git a/src/serve.rs b/src/serve.rs index 288f6f8..c76d174 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -59,7 +59,7 @@ pub async fn run(config_path: String) -> crate::Result<()> { .odoh_upstream() .map(|o| o.host_ip_overrides()) .unwrap_or_default(), - _ => std::collections::HashMap::new(), + _ => std::collections::BTreeMap::new(), }; let bootstrap_resolver: Arc = Arc::new(NumaResolver::new( &config.upstream.fallback, diff --git a/tests/integration.sh b/tests/integration.sh index 1773c11..76b1dab 100755 --- a/tests/integration.sh +++ b/tests/integration.sh @@ -975,11 +975,10 @@ check "Same-host relay+target rejected at startup" \ "same host" \ "$STARTUP_OUT" -# relay_ip / target_ip must land in the bootstrap resolver's override map, -# so reqwest connects direct to the configured IPs instead of resolving the -# hostnames via plain DNS (ODoH's zero-plain-DNS-leak property). Using -# RFC 5737 TEST-NET-1 IPs — never routable, so the OdohConfigCache won't -# actually connect, but the override-map wiring is visible in the startup log. +# Guards ODoH's zero-plain-DNS-leak property: relay_ip / target_ip must +# land in the bootstrap resolver's override map so reqwest connects direct +# to the configured IPs instead of resolving the hostnames via plain DNS. +# RFC 5737 TEST-NET-1 IPs (unroutable). cat > "$CONFIG" << 'CONF' [server] bind_addr = "127.0.0.1:5354" @@ -1019,7 +1018,6 @@ check "target_ip wired into bootstrap override map" \ kill "$NUMA_PID" 2>/dev/null || true wait "$NUMA_PID" 2>/dev/null || true -sleep 1 fi # end Suite 8 -- 2.34.1 From 5ba19e04c8d64d78a336f99e7711d06587b016f2 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 22 Apr 2026 15:49:58 +0300 Subject: [PATCH 123/139] chore: gitignore local Claude Code harness state .claude/ holds per-session harness files (settings.local.json, task locks, worktree metadata). None of it belongs in the repo. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index acfc601..129a76e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ /target /build-dir CLAUDE.md +.claude/ docs/ site/blog/posts/ ios/ -- 2.34.1 From 640b64bf7e1be666d2eb645708924f0cc7f2c1b9 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 22 Apr 2026 15:50:21 +0300 Subject: [PATCH 124/139] chore(site): live-reload dev server via chokidar + browser-sync Replaces the plain python3 http.server + one-shot make blog with a watcher pipeline: chokidar regenerates HTML on MD/template changes, browser-sync serves the site and reloads the browser on rendered-asset changes. First run downloads both via npx; subsequent runs are instant. Preflight checks for npx and pandoc. Port arg parsing is tolerant of legacy --drafts flag ordering (drafts are always included now, since that's what the dev loop actually wants). Cleanup trap kills the watcher on exit so re-runs don't leave orphans. --- scripts/serve-site.sh | 45 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/scripts/serve-site.sh b/scripts/serve-site.sh index 23854ff..18fc4a9 100755 --- a/scripts/serve-site.sh +++ b/scripts/serve-site.sh @@ -1,14 +1,41 @@ #!/usr/bin/env bash +# Dev server for site/: regenerates drafts on each MD change, reloads the +# browser on each rendered HTML/CSS/JS change. Port is the first numeric arg +# (default 9000); any other args are ignored for back-compat. +# +# First run downloads chokidar-cli + browser-sync into the npm cache — slow +# once, instant after that. + set -euo pipefail -PORT="${1:-9000}" +PORT=9000 +for arg in "$@"; do + if [[ "$arg" =~ ^[0-9]+$ ]]; then + PORT="$arg" + break + fi +done -if [[ "${1:-}" == "--drafts" ]] || [[ "${2:-}" == "--drafts" ]]; then - PORT="${PORT//--drafts/9000}" # default port if --drafts was first arg - make blog-drafts -else - make blog -fi +command -v npx >/dev/null || { echo "npx not found. Install Node.js: https://nodejs.org" >&2; exit 1; } +command -v pandoc >/dev/null || { echo "pandoc not found (required by 'make blog-drafts')." >&2; exit 1; } -echo "Serving site at http://localhost:$PORT" -cd site && python3 -m http.server "$PORT" +# Initial render so the first page load has everything. +make blog-drafts + +echo "Serving site at http://localhost:$PORT (drafts included, live reload)" + +# Kill child processes on exit so re-runs don't leave orphaned watchers. +trap 'kill $(jobs -p) 2>/dev/null' EXIT INT TERM + +# Regenerate HTML when MD sources or the blog template change. +npx --yes chokidar-cli \ + "drafts/*.md" "blog/*.md" "site/blog-template.html" \ + -c "make blog-drafts" & + +# Serve + reload on rendered-asset changes. +cd site && exec npx --yes browser-sync start \ + --server . \ + --port "$PORT" \ + --files "**/*.html,**/*.css,**/*.js" \ + --no-open \ + --no-notify -- 2.34.1 From df2062882c4983d70348ba3132db5960f9e414e1 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 22 Apr 2026 16:42:10 +0300 Subject: [PATCH 125/139] chore: bump rustls-webpki to 0.103.13 for RUSTSEC-2026-0104 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Advisory published 2026-04-22: reachable panic in certificate revocation list parsing. Patch is a lockfile-only bump — transitive via rustls, no direct dep changes. Unblocks cargo audit in CI across all open PRs. --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c7a8742..1da534a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2130,9 +2130,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.12" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "aws-lc-rs", "ring", -- 2.34.1 From 2e461ccc0f71098427fec0f21fdf153c9966d65f Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 22 Apr 2026 15:49:39 +0300 Subject: [PATCH 126/139] docs(config): add ODoH upstream examples with relay_ip/target_ip pinning Complements the bootstrap resolver fix (#122, #126) by documenting the ODoH knobs in the commented config template. Explains relay_ip/target_ip as the way to prevent plain-DNS leaks of the relay/target hostnames via the bootstrap resolver on cold boot when numa is its own system DNS. --- numa.toml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/numa.toml b/numa.toml index c25654a..93418ea 100644 --- a/numa.toml +++ b/numa.toml @@ -22,6 +22,7 @@ api_port = 5380 # [upstream] # mode = "forward" # "forward" (default) — relay to upstream # # "recursive" — resolve from root hints (no address needed) +# # "odoh" — Oblivious DoH (see ODoH block below) # address = "9.9.9.9" # single upstream (plain UDP) # address = ["192.168.1.1", "9.9.9.9:5353"] # multiple upstreams — SRTT picks fastest # address = "https://dns.quad9.net/dns-query" # DNS-over-HTTPS (encrypted) @@ -34,6 +35,22 @@ api_port = 5380 # # to the same upstream. Rescues packet loss (UDP), # # dispatch spikes (DoH), TLS stalls (DoT). # # Set to 0 to disable. Default: 10 + +# ODoH (Oblivious DNS-over-HTTPS, RFC 9230). The relay sees your IP but +# not the question; the target sees the question but not your IP. Numa +# refuses same-operator relay+target configs by default (eTLD+1 check). +# [upstream] +# mode = "odoh" +# relay = "https://odoh-relay.numa.rs/proxy" +# target = "https://odoh.cloudflare-dns.com/dns-query" +# strict = true # default: refuse to downgrade to `fallback` +# # on relay failure. Set false to allow a +# # non-oblivious fallback path. +# relay_ip = "178.104.229.30" # optional: pin IPs so numa doesn't leak the +# target_ip = "104.16.249.249" # relay/target hostnames via the bootstrap +# # resolver on cold boot when numa is its +# # own system DNS. See docs/implementation/ +# # bootstrap-resolver.md. # root_hints = [ # only used in recursive mode # "198.41.0.4", # a.root-servers.net (Verisign) # "199.9.14.201", # b.root-servers.net (USC-ISI) -- 2.34.1 From 26b1cd5917a9909cc2e28b23311db7d7e89005cb Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 22 Apr 2026 15:50:13 +0300 Subject: [PATCH 127/139] feat(packaging): ODoH client Docker deploy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single-container docker-compose recipe for running numa in ODoH client mode. Ships with a starter numa.toml pointing at odoh-relay.numa.rs paired with Cloudflare's ODoH target — two independent operators with distinct eTLD+1s, so the default passes numa's same-operator check. Exposes :53 UDP+TCP for LAN clients and :5380 for the dashboard + REST API. README covers prerequisites, deploy, verification, and the ODoH privacy boundary (relay sees IP, target sees query, neither sees both). Advertised alongside packaging/relay/ in the main README Docker section. --- README.md | 4 ++ packaging/client/README.md | 72 +++++++++++++++++++++++++++++ packaging/client/docker-compose.yml | 15 ++++++ packaging/client/numa.toml | 23 +++++++++ 4 files changed, 114 insertions(+) create mode 100644 packaging/client/README.md create mode 100644 packaging/client/docker-compose.yml create mode 100644 packaging/client/numa.toml diff --git a/README.md b/README.md index 905cd02..3632638 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,10 @@ docker run -d --name numa --network host \ Multi-arch: `linux/amd64` and `linux/arm64`. +Turnkey compose recipes: +- [`packaging/client/`](packaging/client/) — ODoH client mode (anonymous DNS), Numa + starter `numa.toml`. +- [`packaging/relay/`](packaging/relay/) — public ODoH relay, Numa + Caddy + ACME. + ## How It Compares | | Pi-hole | AdGuard Home | Unbound | Numa | diff --git a/packaging/client/README.md b/packaging/client/README.md new file mode 100644 index 0000000..f6e76c0 --- /dev/null +++ b/packaging/client/README.md @@ -0,0 +1,72 @@ +# Numa ODoH Client — Docker deploy + +Single-container deploy that runs Numa as an ODoH (RFC 9230) client: every +DNS query routes through an independent relay + target so neither operator +sees both your IP and your question. See the [ODoH integration doc][odoh] +for the full protocol and privacy trade-offs. + +[odoh]: ../../docs/implementation/odoh-integration.md + +## Prerequisites + +- Docker + Docker Compose v2. +- Port 53 (UDP+TCP) free on the host — Numa listens there for DNS + clients on your LAN. + +## Configure + +The shipped `numa.toml` points at Numa's own public relay +(`odoh-relay.numa.rs`) paired with Cloudflare's ODoH target +(`odoh.cloudflare-dns.com`). That's two independent operators with +distinct eTLD+1s — the default configuration passes Numa's same-operator +check and works out of the box. + +To use a different relay or target, edit `numa.toml` and adjust the URLs. +The `relay` and `target` must resolve to distinct operators or Numa +refuses to start. + +## Deploy + +```sh +docker compose up -d +docker compose logs -f numa # watch startup +``` + +The first query fires the bootstrap resolver + ODoH config fetch; +subsequent queries reuse the warm HTTP/2 connection. + +## Point your devices at it + +Set each device's DNS server to the IP of the Docker host. For a LAN-wide +rollout, set the DNS server in your router's DHCP config so every device +picks it up automatically. + +Verify a query landed on the ODoH path: + +```sh +dig @ example.com +curl http://:5380/stats | jq '.upstream_transport.odoh' +``` + +`upstream_transport.odoh` should increment on each query. + +## What this does NOT buy you + +ODoH protects the *path*, not the content: + +- **The target (Cloudflare here) still sees the question.** It just + doesn't know it's you asking. If Cloudflare logs every ODoH query, the + query is still visible — it's simply unattributed. +- **The relay is a trusted party for availability.** A malicious relay + can drop or delay queries; it just can't read them. +- **Traffic analysis defeats small relays.** If you're the only client + talking to a relay, timing alone re-identifies you. Shared, busy relays + give better anonymity sets. + +See the [ODoH integration doc][odoh] for more. + +## Relay operator? + +If you'd rather run your own relay (same binary, different mode), see +[`../relay/`](../relay/) — that package spins up a public-facing relay +with Caddy + ACME in front of it. diff --git a/packaging/client/docker-compose.yml b/packaging/client/docker-compose.yml new file mode 100644 index 0000000..361f5db --- /dev/null +++ b/packaging/client/docker-compose.yml @@ -0,0 +1,15 @@ +services: + numa: + image: ghcr.io/razvandimescu/numa:latest + command: ["/etc/numa/numa.toml"] + ports: + - "53:53/udp" + - "53:53/tcp" + - "5380:5380/tcp" # dashboard + REST API + volumes: + - ./numa.toml:/etc/numa/numa.toml:ro + - numa_data:/var/lib/numa + restart: unless-stopped + +volumes: + numa_data: diff --git a/packaging/client/numa.toml b/packaging/client/numa.toml new file mode 100644 index 0000000..039d723 --- /dev/null +++ b/packaging/client/numa.toml @@ -0,0 +1,23 @@ +# Numa — ODoH client mode (docker-compose starter). +# Sends every DNS query through an independent relay + target pair so +# neither operator sees both your IP and your question. See +# docs/implementation/odoh-integration.md for the protocol details and +# packaging/client/README.md for deploy notes. + +[server] +bind_addr = "0.0.0.0:53" +api_bind_addr = "0.0.0.0" +data_dir = "/var/lib/numa" + +[upstream] +mode = "odoh" +# Numa's own relay (Hetzner, systemd + Caddy). Swap to any other public +# ODoH relay if you'd rather not depend on a single operator; the protocol +# tolerates it, and Numa refuses same-operator relay+target by default. +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +# strict = true (default). Relay failure → SERVFAIL, never silent downgrade. + +[blocking] +enabled = true +# Default blocklist (Hagezi Pro). Edit the `lists` array to taste. -- 2.34.1 From b8a125b598cad7d305c193532b17d1975cf37780 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 22 Apr 2026 23:30:55 +0300 Subject: [PATCH 128/139] fix(upstream): default hedge_ms=0 to avoid silent 2x upstream query count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hedging fires a second upstream query against the same upstream after the hedge delay. Rescues packet loss and handshake stalls on flaky links, but every lookup shows up twice at the provider — silently halves the headroom for anyone on a quota'd upstream (NextDNS free tier, Control D, paid Quad9). Surfaced by #134 (bcookatpcsd), who saw every query duplicated on the NextDNS dashboard with a single-address DoT upstream. Not a bug — the feature doing what it says on the tin — but a surprising default. Flipping the default to 0 makes hedging explicitly opt-in. Users who want tail-latency rescue on flaky nets add `hedge_ms = 10` (or higher). No config migration needed; no breaking changes to the API surface. Also tightens the numa.toml comment so the trade-off is visible at config time, not retroactively on a provider dashboard. --- numa.toml | 12 +++++++----- src/config.rs | 6 +++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/numa.toml b/numa.toml index 93418ea..baf35aa 100644 --- a/numa.toml +++ b/numa.toml @@ -30,11 +30,13 @@ api_port = 5380 # fallback = ["8.8.8.8", "1.1.1.1"] # tried only when all primaries fail # port = 53 # default port for addresses without :port # timeout_ms = 3000 -# hedge_ms = 10 # request hedging delay (ms). After this delay -# # without a response, fires a parallel request -# # to the same upstream. Rescues packet loss (UDP), -# # dispatch spikes (DoH), TLS stalls (DoT). -# # Set to 0 to disable. Default: 10 +# hedge_ms = 0 # request hedging delay (ms). Default: 0 (off). +# # Set to e.g. 10 to fire a parallel upstream +# # request after 10ms of silence — rescues packet +# # loss (UDP), dispatch spikes (DoH), TLS stalls +# # (DoT). Doubles the upstream query count, so +# # leave off for quota'd providers (NextDNS, +# # Control D). # ODoH (Oblivious DNS-over-HTTPS, RFC 9230). The relay sees your IP but # not the question; the target sees the question but not your IP. Numa diff --git a/src/config.rs b/src/config.rs index 6daf430..f28d647 100644 --- a/src/config.rs +++ b/src/config.rs @@ -451,8 +451,12 @@ fn default_upstream_port() -> u16 { fn default_timeout_ms() -> u64 { 5000 } +/// Off by default: hedging fires a second upstream query, which silently +/// doubles the count at the provider — hurts quota'd DNS (NextDNS, Control +/// D). Opt in with `hedge_ms = 10` for tail-latency rescue on flaky nets +/// or handshake-slow DoT. fn default_hedge_ms() -> u64 { - 10 + 0 } #[derive(Deserialize)] -- 2.34.1 From 3ec3b40830698c21755a54196079e5be027a6cc8 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 22 Apr 2026 23:50:20 +0300 Subject: [PATCH 129/139] chore: bump version to 0.15.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1da534a..c1336fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1547,7 +1547,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.14.1" +version = "0.15.0" dependencies = [ "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index 39f75a2..025b2a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.14.1" +version = "0.15.0" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" -- 2.34.1 From e6e79273b95fc7fa7d59b70ac550c3ad6ba1c82a Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 22 Apr 2026 23:57:28 +0300 Subject: [PATCH 130/139] Revert "chore: bump version to 0.15.0" This reverts commit 3ec3b40830698c21755a54196079e5be027a6cc8. --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c1336fc..1da534a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1547,7 +1547,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.15.0" +version = "0.14.1" dependencies = [ "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index 025b2a8..39f75a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.15.0" +version = "0.14.1" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" -- 2.34.1 From c787de15486d703b9395e3113fd52a2bb73e1850 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Wed, 22 Apr 2026 23:57:37 +0300 Subject: [PATCH 131/139] chore: bump version to 0.14.2 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1da534a..9957031 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1547,7 +1547,7 @@ dependencies = [ [[package]] name = "numa" -version = "0.14.1" +version = "0.14.2" dependencies = [ "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index 39f75a2..01773d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numa" -version = "0.14.1" +version = "0.14.2" authors = ["razvandimescu "] edition = "2021" description = "Portable DNS resolver in Rust — .numa local domains, ad blocking, developer overrides, DNS-over-HTTPS" -- 2.34.1 From 2274151c17995287291c585bcd38120cd7001174 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 23 Apr 2026 00:35:41 +0300 Subject: [PATCH 132/139] fix(packet): parse SOA natively to stop malformed replies (#128) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SOA records were stored as opaque bytes (DnsRecord::UNKNOWN), so the RFC 1035 §3.3.13 MNAME/RNAME name-compression pointers — offsets into the upstream packet — were re-emitted verbatim. Once Numa applied its own compression to surrounding names, those pointers landed on garbage and clients rejected the reply ("malformed reply packet" in kdig). Parse SOA via read_qname and write via write_qname, matching the NS/CNAME/MX pattern. Adds the canonical-rdata arm in dnssec.rs for RRSIG verification. Regression test round-trips a CNAME-chain response with a compressed SOA in authority through hickory-proto strict parse. --- src/dnssec.rs | 22 +++++++ src/record.rs | 70 ++++++++++++++++++++- tests/soa_compression_bug.rs | 115 +++++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 tests/soa_compression_bug.rs diff --git a/src/dnssec.rs b/src/dnssec.rs index 8614810..877b495 100644 --- a/src/dnssec.rs +++ b/src/dnssec.rs @@ -882,6 +882,28 @@ fn record_rdata_canonical(record: &DnsRecord) -> Vec { rdata.extend(type_bitmap); rdata } + DnsRecord::SOA { + mname, + rname, + serial, + refresh, + retry, + expire, + minimum, + .. + } => { + let mname_wire = name_to_wire(mname); + let rname_wire = name_to_wire(rname); + let mut rdata = Vec::with_capacity(mname_wire.len() + rname_wire.len() + 20); + rdata.extend(&mname_wire); + rdata.extend(&rname_wire); + rdata.extend(&serial.to_be_bytes()); + rdata.extend(&refresh.to_be_bytes()); + rdata.extend(&retry.to_be_bytes()); + rdata.extend(&expire.to_be_bytes()); + rdata.extend(&minimum.to_be_bytes()); + rdata + } DnsRecord::UNKNOWN { data, .. } => data.clone(), DnsRecord::RRSIG { .. } => Vec::new(), } diff --git a/src/record.rs b/src/record.rs index 7de9bb4..0fefd72 100644 --- a/src/record.rs +++ b/src/record.rs @@ -24,6 +24,17 @@ pub enum DnsRecord { host: String, ttl: u32, }, + SOA { + domain: String, + mname: String, + rname: String, + serial: u32, + refresh: u32, + retry: u32, + expire: u32, + minimum: u32, + ttl: u32, + }, CNAME { domain: String, host: String, @@ -100,6 +111,7 @@ impl DnsRecord { | DnsRecord::RRSIG { domain, .. } | DnsRecord::NSEC { domain, .. } | DnsRecord::NSEC3 { domain, .. } + | DnsRecord::SOA { domain, .. } | DnsRecord::UNKNOWN { domain, .. } => domain, } } @@ -111,6 +123,7 @@ impl DnsRecord { DnsRecord::NS { .. } => QueryType::NS, DnsRecord::CNAME { .. } => QueryType::CNAME, DnsRecord::MX { .. } => QueryType::MX, + DnsRecord::SOA { .. } => QueryType::SOA, DnsRecord::DNSKEY { .. } => QueryType::DNSKEY, DnsRecord::DS { .. } => QueryType::DS, DnsRecord::RRSIG { .. } => QueryType::RRSIG, @@ -132,6 +145,7 @@ impl DnsRecord { | DnsRecord::RRSIG { ttl, .. } | DnsRecord::NSEC { ttl, .. } | DnsRecord::NSEC3 { ttl, .. } + | DnsRecord::SOA { ttl, .. } | DnsRecord::UNKNOWN { ttl, .. } => *ttl, } } @@ -172,6 +186,12 @@ impl DnsRecord { + next_hashed_owner.capacity() + type_bitmap.capacity() } + DnsRecord::SOA { + domain, + mname, + rname, + .. + } => domain.capacity() + mname.capacity() + rname.capacity(), DnsRecord::UNKNOWN { domain, data, .. } => domain.capacity() + data.capacity(), } } @@ -188,6 +208,7 @@ impl DnsRecord { | DnsRecord::RRSIG { ttl, .. } | DnsRecord::NSEC { ttl, .. } | DnsRecord::NSEC3 { ttl, .. } + | DnsRecord::SOA { ttl, .. } | DnsRecord::UNKNOWN { ttl, .. } => *ttl = new_ttl, } } @@ -365,8 +386,31 @@ impl DnsRecord { ttl, }) } + QueryType::SOA => { + // MNAME/RNAME compressible per RFC 1035 §3.3.13 — decompress to avoid stale pointers on re-emit. + let mut mname = String::with_capacity(64); + buffer.read_qname(&mut mname)?; + let mut rname = String::with_capacity(64); + buffer.read_qname(&mut rname)?; + let serial = buffer.read_u32()?; + let refresh = buffer.read_u32()?; + let retry = buffer.read_u32()?; + let expire = buffer.read_u32()?; + let minimum = buffer.read_u32()?; + Ok(DnsRecord::SOA { + domain, + mname, + rname, + serial, + refresh, + retry, + expire, + minimum, + ttl, + }) + } _ => { - // SOA, TXT, SRV, etc. — stored as opaque bytes until parsed natively + // TXT, SRV, HTTPS, SVCB, etc. — stored as opaque bytes until parsed natively let data = buffer.get_range(buffer.pos(), data_len as usize)?.to_vec(); buffer.step(data_len as usize)?; Ok(DnsRecord::UNKNOWN { @@ -430,6 +474,30 @@ impl DnsRecord { let size = buffer.pos() - (pos + 2); buffer.set_u16(pos, size as u16)?; } + DnsRecord::SOA { + ref domain, + ref mname, + ref rname, + serial, + refresh, + retry, + expire, + minimum, + ttl, + } => { + write_header(buffer, domain, QueryType::SOA.to_num(), ttl)?; + let rdlen_pos = buffer.pos(); + buffer.write_u16(0)?; + buffer.write_qname(mname)?; + buffer.write_qname(rname)?; + buffer.write_u32(serial)?; + buffer.write_u32(refresh)?; + buffer.write_u32(retry)?; + buffer.write_u32(expire)?; + buffer.write_u32(minimum)?; + let rdlen = buffer.pos() - (rdlen_pos + 2); + buffer.set_u16(rdlen_pos, rdlen as u16)?; + } DnsRecord::AAAA { ref domain, ref addr, diff --git a/tests/soa_compression_bug.rs b/tests/soa_compression_bug.rs new file mode 100644 index 0000000..5f4f2f0 --- /dev/null +++ b/tests/soa_compression_bug.rs @@ -0,0 +1,115 @@ +//! Regression test for issue #128: SOA with compressed MNAME/RNAME must +//! survive Numa's round-trip — compression pointers reference the upstream +//! packet's byte layout, so we have to decompress on read and re-compress +//! on write. + +use numa::buffer::BytePacketBuffer; +use numa::packet::DnsPacket; + +const COMPRESSION_FLAG: u16 = 0xC000; + +fn upstream_packet() -> Vec { + let mut p = Vec::::new(); + + p.extend_from_slice(&[ + 0x12, 0x34, 0x81, 0x80, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, + ]); + + assert_eq!(p.len(), 12); + write_name(&mut p, &["odin", "adobe", "com"]); + p.extend_from_slice(&[0x00, 0x41, 0x00, 0x01]); + + p.extend_from_slice(&[0xC0, 0x0C]); + p.extend_from_slice(&[0x00, 0x05, 0x00, 0x01, 0x00, 0x00, 0x23, 0x7F]); + let rdlen_pos_1 = p.len(); + p.extend_from_slice(&[0x00, 0x00]); + let cname1_start = p.len(); + write_name(&mut p, &["cdn", "adobeaemcloud", "com"]); + let rdlen_1 = (p.len() - cname1_start) as u16; + p[rdlen_pos_1..rdlen_pos_1 + 2].copy_from_slice(&rdlen_1.to_be_bytes()); + + p.extend_from_slice(&(COMPRESSION_FLAG | cname1_start as u16).to_be_bytes()); + p.extend_from_slice(&[0x00, 0x05, 0x00, 0x01, 0x00, 0x00, 0x23, 0x7F]); + let rdlen_pos_2 = p.len(); + p.extend_from_slice(&[0x00, 0x00]); + let cname2_start = p.len(); + p.push(9); + p.extend_from_slice(b"adobe-aem"); + let map_label_off = p.len(); + p.push(3); + p.extend_from_slice(b"map"); + let fastly_label_off = p.len(); + p.push(6); + p.extend_from_slice(b"fastly"); + p.push(3); + p.extend_from_slice(b"net"); + p.push(0); + let rdlen_2 = (p.len() - cname2_start) as u16; + p[rdlen_pos_2..rdlen_pos_2 + 2].copy_from_slice(&rdlen_2.to_be_bytes()); + + p.extend_from_slice(&(COMPRESSION_FLAG | fastly_label_off as u16).to_be_bytes()); + p.extend_from_slice(&[0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x07, 0x08]); + let rdlen_pos_soa = p.len(); + p.extend_from_slice(&[0x00, 0x00]); + let soa_rdata_start = p.len(); + p.extend_from_slice(&(COMPRESSION_FLAG | map_label_off as u16).to_be_bytes()); + p.extend_from_slice(&(COMPRESSION_FLAG | fastly_label_off as u16).to_be_bytes()); + p.extend_from_slice(&1u32.to_be_bytes()); + p.extend_from_slice(&7200u32.to_be_bytes()); + p.extend_from_slice(&3600u32.to_be_bytes()); + p.extend_from_slice(&1209600u32.to_be_bytes()); + p.extend_from_slice(&1800u32.to_be_bytes()); + let rdlen_soa = (p.len() - soa_rdata_start) as u16; + p[rdlen_pos_soa..rdlen_pos_soa + 2].copy_from_slice(&rdlen_soa.to_be_bytes()); + + p +} + +fn write_name(p: &mut Vec, labels: &[&str]) { + for l in labels { + p.push(l.len() as u8); + p.extend_from_slice(l.as_bytes()); + } + p.push(0); +} + +#[test] +fn compressed_soa_survives_numa_round_trip() { + let upstream = upstream_packet(); + + let hickory_in = hickory_proto::op::Message::from_vec(&upstream) + .expect("hand-crafted upstream must be valid"); + let soa_in_rd = hickory_in.name_servers()[0] + .data() + .clone() + .into_soa() + .expect("SOA rdata"); + assert_eq!(soa_in_rd.mname().to_string(), "map.fastly.net."); + assert_eq!(soa_in_rd.rname().to_string(), "fastly.net."); + + let mut in_buf = BytePacketBuffer::from_bytes(&upstream); + let pkt = DnsPacket::from_buffer(&mut in_buf).expect("numa parses upstream"); + assert_eq!(pkt.answers.len(), 2); + assert_eq!(pkt.authorities.len(), 1); + + let mut out_buf = BytePacketBuffer::new(); + pkt.write(&mut out_buf).expect("numa writes"); + let out = out_buf.filled().to_vec(); + + let hickory_out = + hickory_proto::op::Message::from_vec(&out).expect("numa re-emission must parse strictly"); + + let soa_out_rd = hickory_out.name_servers()[0] + .data() + .clone() + .into_soa() + .expect("SOA rdata on output"); + + assert_eq!(soa_out_rd.mname().to_string(), "map.fastly.net."); + assert_eq!(soa_out_rd.rname().to_string(), "fastly.net."); + assert_eq!(soa_out_rd.serial(), 1); + assert_eq!(soa_out_rd.refresh(), 7200); + assert_eq!(soa_out_rd.retry(), 3600); + assert_eq!(soa_out_rd.expire(), 1209600); + assert_eq!(soa_out_rd.minimum(), 1800); +} -- 2.34.1 From 96cf778beafd0776415a5b782de186d3bd18b43e Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 23 Apr 2026 08:53:35 +0300 Subject: [PATCH 133/139] docs(config): fix ODoH relay path in numa.toml example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The example in `numa.toml` pointed at `https://odoh-relay.numa.rs/proxy`, but the relay only serves the ODoH endpoint at `/relay` (every other reference in the tree — `src/config.rs` docs and tests, and `packaging/client/numa.toml` — uses `/relay`). Users who copied the example got `404 Not Found` on every query and SERVFAIL at the client. Reported in #138. --- numa.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numa.toml b/numa.toml index baf35aa..2138dd2 100644 --- a/numa.toml +++ b/numa.toml @@ -43,7 +43,7 @@ api_port = 5380 # refuses same-operator relay+target configs by default (eTLD+1 check). # [upstream] # mode = "odoh" -# relay = "https://odoh-relay.numa.rs/proxy" +# relay = "https://odoh-relay.numa.rs/relay" # target = "https://odoh.cloudflare-dns.com/dns-query" # strict = true # default: refuse to downgrade to `fallback` # # on relay failure. Set false to allow a -- 2.34.1 From e702f5861b27088a68ce8b75d4f086516cb4c858 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Thu, 23 Apr 2026 09:39:34 +0300 Subject: [PATCH 134/139] Update README.md to remove outdated listing information Removed section about listing on the public ecosystem and DNSCrypt's canonical list. --- packaging/relay/README.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/packaging/relay/README.md b/packaging/relay/README.md index 373b263..b86e284 100644 --- a/packaging/relay/README.md +++ b/packaging/relay/README.md @@ -39,10 +39,3 @@ curl https:///health Then point any ODoH client at `https:///relay` and watch the counters tick. - -## Listing on the public ecosystem - -DNSCrypt's [v3/odoh-relays.md](https://github.com/DNSCrypt/dnscrypt-resolvers/blob/master/v3/odoh-relays.md) -is the canonical list. The pruned 2025-09-16 commit shows one public ODoH -relay survived the cull — running this compose file doubles global supply. -Open a PR there once your relay has been up for ~24 hours. -- 2.34.1 From f7f35b34241769dd817eb18ef80da84924d53610 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Fri, 24 Apr 2026 15:09:16 +0300 Subject: [PATCH 135/139] docs: lift user-facing guides to recipes/, drop dangling docs/ refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docs/ is gitignored; references to docs/implementation/*.md from public source, configs, and packaging were dead links outside the maintainer machine. Adds four recipes (README, dnsdist-front, doh-on-lan, odoh-upstream) under top-level recipes/ and repoints existing pointers. - numa.toml, packaging/client/{README.md,numa.toml}: point to recipes/odoh-upstream.md. - src/{bootstrap_resolver,forward,serve}.rs: reference issue #122 directly (module scope is broader than the ODoH-specific recipe). - src/health.rs: drop the §-ref; iOS HealthInfo remains named as the canonical consumer. --- numa.toml | 4 +-- packaging/client/README.md | 6 ++-- packaging/client/numa.toml | 2 +- recipes/README.md | 11 +++++++ recipes/dnsdist-front.md | 64 ++++++++++++++++++++++++++++++++++++++ recipes/doh-on-lan.md | 61 ++++++++++++++++++++++++++++++++++++ recipes/odoh-upstream.md | 59 +++++++++++++++++++++++++++++++++++ src/bootstrap_resolver.rs | 3 +- src/forward.rs | 3 +- src/health.rs | 9 +++--- src/serve.rs | 1 - 11 files changed, 207 insertions(+), 16 deletions(-) create mode 100644 recipes/README.md create mode 100644 recipes/dnsdist-front.md create mode 100644 recipes/doh-on-lan.md create mode 100644 recipes/odoh-upstream.md diff --git a/numa.toml b/numa.toml index 2138dd2..57d0249 100644 --- a/numa.toml +++ b/numa.toml @@ -51,8 +51,8 @@ api_port = 5380 # relay_ip = "178.104.229.30" # optional: pin IPs so numa doesn't leak the # target_ip = "104.16.249.249" # relay/target hostnames via the bootstrap # # resolver on cold boot when numa is its -# # own system DNS. See docs/implementation/ -# # bootstrap-resolver.md. +# # own system DNS. See +# # recipes/odoh-upstream.md. # root_hints = [ # only used in recursive mode # "198.41.0.4", # a.root-servers.net (Verisign) # "199.9.14.201", # b.root-servers.net (USC-ISI) diff --git a/packaging/client/README.md b/packaging/client/README.md index f6e76c0..f66359f 100644 --- a/packaging/client/README.md +++ b/packaging/client/README.md @@ -2,10 +2,10 @@ Single-container deploy that runs Numa as an ODoH (RFC 9230) client: every DNS query routes through an independent relay + target so neither operator -sees both your IP and your question. See the [ODoH integration doc][odoh] -for the full protocol and privacy trade-offs. +sees both your IP and your question. See the [ODoH upstream recipe][odoh] +for the protocol details and the bootstrap-pinning trade-offs. -[odoh]: ../../docs/implementation/odoh-integration.md +[odoh]: ../../recipes/odoh-upstream.md ## Prerequisites diff --git a/packaging/client/numa.toml b/packaging/client/numa.toml index 039d723..64b9268 100644 --- a/packaging/client/numa.toml +++ b/packaging/client/numa.toml @@ -1,7 +1,7 @@ # Numa — ODoH client mode (docker-compose starter). # Sends every DNS query through an independent relay + target pair so # neither operator sees both your IP and your question. See -# docs/implementation/odoh-integration.md for the protocol details and +# recipes/odoh-upstream.md for the protocol details and # packaging/client/README.md for deploy notes. [server] diff --git a/recipes/README.md b/recipes/README.md new file mode 100644 index 0000000..fa05c2d --- /dev/null +++ b/recipes/README.md @@ -0,0 +1,11 @@ +# Recipes + +Scenario-driven configs for common Numa deployments. Each recipe is self-contained: copy the snippet, adjust the marked fields, reload. + +## Transport / encryption + +- [DoH on the LAN](doh-on-lan.md) — expose Numa's built-in DNS-over-HTTPS to local clients. +- [dnsdist in front of Numa](dnsdist-front.md) — terminate public TLS externally, keep Numa on loopback. +- [ODoH upstream with bootstrap pinning](odoh-upstream.md) — oblivious DNS client mode without leaking the relay/target hostnames. + +Missing a scenario? Open an issue or PR — these are plain Markdown with no build step. diff --git a/recipes/dnsdist-front.md b/recipes/dnsdist-front.md new file mode 100644 index 0000000..310b53c --- /dev/null +++ b/recipes/dnsdist-front.md @@ -0,0 +1,64 @@ +# dnsdist in front of Numa + +For public DoH with a real (ACME-signed) cert, terminate TLS outside Numa and forward plain DNS (or loopback-only DoH) to the resolver. Cert renewal, rate-limiting, and load-balancing live in the front-end; Numa stays focused on resolution. + +## When to use this + +- Public hostname (`dns.example.com`) with a Let's Encrypt or internal PKI cert. +- You want a dedicated front-end for DoH/DoT/DoQ while Numa stays loopback-bound. +- You plan to run multiple Numa instances behind one endpoint. + +## Architecture + +``` + public 443/DoH ┐ + public 853/DoT ├─► dnsdist ─► 127.0.0.1:53 (Numa UDP/TCP) + public 443/DoQ ┘ +``` + +## dnsdist config + +```lua +-- /etc/dnsdist/dnsdist.conf + +newServer({address="127.0.0.1:53", name="numa", checkType="A", checkName="numa.rs."}) + +addDOHLocal( + "0.0.0.0:443", + "/etc/letsencrypt/live/dns.example.com/fullchain.pem", + "/etc/letsencrypt/live/dns.example.com/privkey.pem", + "/dns-query", + {doTCP=true, reusePort=true} +) + +addTLSLocal( + "0.0.0.0:853", + "/etc/letsencrypt/live/dns.example.com/fullchain.pem", + "/etc/letsencrypt/live/dns.example.com/privkey.pem" +) + +addAction(AllRule(), PoolAction("", false)) +``` + +## Numa config + +```toml +[proxy] +enabled = true # keep if you still use *.numa service routing +bind_addr = "127.0.0.1" # stays default +``` + +No changes to `[server]` — Numa keeps serving plain DNS on UDP/TCP 53, which dnsdist forwards. + +## Caveat: client IPs + +Without PROXY protocol support in Numa, the query log shows the front-end's IP on every query, not the real client. dnsdist can emit PROXY v2 (`useProxyProtocol=true` on `newServer`), but Numa doesn't yet parse it — tracked in the wish-list under #143. Until then, accept the blind spot or correlate against dnsdist's own logs. + +## Verify + +```bash +kdig +https @dns.example.com example.com +kdig +tls @dns.example.com example.com +``` + +Both should return clean answers. Numa's `/queries` API should show the request landing, sourced from the front-end IP. diff --git a/recipes/doh-on-lan.md b/recipes/doh-on-lan.md new file mode 100644 index 0000000..70b607e --- /dev/null +++ b/recipes/doh-on-lan.md @@ -0,0 +1,61 @@ +# DoH on the LAN + +Numa ships an RFC 8484 DoH endpoint (`POST /dns-query`) on the `[proxy]` HTTPS listener. By default it binds `127.0.0.1:443` with a self-signed cert — invisible to anything off the box. Three changes make it reachable from the LAN. + +## When to use this + +- Your phone/laptop is on the same network as Numa and you want encrypted DNS without a cloud resolver. +- You're OK installing Numa's self-signed CA on every client (one-time, via `/ca.pem` + the mobileconfig flow). + +For a publicly-trusted cert, see [dnsdist in front of Numa](dnsdist-front.md) instead. + +## Minimal config + +```toml +[proxy] +enabled = true # default +bind_addr = "0.0.0.0" # was 127.0.0.1 — expose to LAN +tls_port = 443 # default; DoH is served here +tld = "numa" # default — self-resolving, see below +``` + +`tld` is the DoH gate: Numa accepts the DoH request only when the `Host` header is loopback or equals (or is a subdomain of) `tld`. Clients therefore dial `https://numa/dns-query`. + +With the default `tld = "numa"`, there's no DNS bootstrap to configure: Numa already resolves `numa` and `*.numa` to its own LAN IP for remote clients (that's how the `*.numa` service-proxy feature works). Any client that uses Numa as its resolver will resolve `numa` correctly on first try. + +If you'd rather use a hostname that resolves via normal DNS (e.g. you want DoH-only clients that never talk plain DNS to Numa), set `tld = "dns.example.com"` and add a matching A record in whichever DNS your clients consult before reaching Numa. + +## Trust the CA on each client + +Numa generates a self-signed CA at startup. Fetch it once, import it wherever you'll run the DoH client: + +```bash +curl -o numa-ca.pem http://:5380/ca.pem +``` + +- **macOS** — `sudo security add-trusted-cert -d -r trustRoot -k /Library/Keychains/System.keychain numa-ca.pem` +- **iOS** — install the mobileconfig from the API (same CA, signed profile). Flip *Settings → General → About → Certificate Trust Settings* on after install. +- **Linux** — drop into `/usr/local/share/ca-certificates/` and run `sudo update-ca-certificates`. +- **Android** — requires the user-installed CA path; browsers may still refuse it for DoH. Consider the [dnsdist front](dnsdist-front.md) route instead. + +## Verify + +```bash +kdig +https @numa example.com +``` + +Without `+https` kdig uses plain DNS. With `+https` the same answers should flow over port 443. + +Raw check: + +```bash +curl -H 'accept: application/dns-message' \ + --data-binary @query.bin \ + https://numa/dns-query +``` + +## Gotchas + +- Port 443 is privileged on Linux/macOS. Run Numa via the provided service units, or grant `CAP_NET_BIND_SERVICE` (`sudo setcap 'cap_net_bind_service=+ep' /path/to/numa`). +- Non-matching `Host` header → HTTP 404 from the proxy's fallback handler. Double-check `tld`. +- ChromeOS enrollment rejects user-installed CAs for some flows — known pain point, see issue #136. diff --git a/recipes/odoh-upstream.md b/recipes/odoh-upstream.md new file mode 100644 index 0000000..0469bca --- /dev/null +++ b/recipes/odoh-upstream.md @@ -0,0 +1,59 @@ +# ODoH upstream with bootstrap pinning + +Numa can run as an Oblivious DoH (RFC 9230) client: the relay sees your IP but not the question, the target sees the question but not your IP. Neither party alone can re-identify a query. This recipe covers the minimal config and the bootstrap leak that `relay_ip` / `target_ip` close. + +## When to use this + +- You want split-trust encrypted DNS without a single provider seeing both who you are and what you asked. +- Numa is your system resolver (so there's no "other" DNS to ask). + +## Minimal config + +```toml +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +strict = true # refuse to fall back to a non-oblivious path on relay failure +``` + +`strict = true` means a relay-level HTTPS failure returns SERVFAIL instead of silently downgrading. Set it to `false` and configure `[upstream].fallback` if you'd rather keep resolving (at the cost of the oblivious property). + +## The bootstrap leak + +When Numa is the system resolver and needs to reach the relay/target, *something* has to translate `odoh-relay.numa.rs` → IP. If Numa asks itself, you deadlock. If Numa asks a bootstrap resolver (1.1.1.1, 9.9.9.9), that resolver learns which ODoH endpoint you use in cleartext — it can't see your questions, but it sees the destination. That's the leak ODoH was supposed to close. + +`relay_ip` and `target_ip` tell Numa the IPs directly, so it never asks anyone: + +```toml +[upstream] +mode = "odoh" +relay = "https://odoh-relay.numa.rs/relay" +target = "https://odoh.cloudflare-dns.com/dns-query" +relay_ip = "178.104.229.30" # pin the relay — no hostname lookup +target_ip = "104.16.249.249" # pin the target — no hostname lookup +``` + +Numa still validates TLS against the hostnames in `relay` / `target`, so a hijacked IP can't masquerade — pinning skips only the DNS step. + +## Finding current IPs + +```bash +dig +short odoh-relay.numa.rs +dig +short odoh.cloudflare-dns.com +``` + +Re-pin when an operator rotates. The community-maintained list at is a useful cross-reference. + +## Verify + +```bash +kdig @127.0.0.1 example.com +``` + +Numa's `/queries` API and startup banner should label the upstream as `odoh://`. Look for `ODoH relay returned ...` errors in the logs if routing fails. + +## Known gotchas + +- **Same-operator refused.** Numa's eTLD+1 check blocks configs where the relay and target belong to the same operator (pointless — same party sees both sides). Override only when testing. +- **Single relay.** Current config accepts one relay and one target. Multi-entry rotation/failover is tracked in #140. diff --git a/src/bootstrap_resolver.rs b/src/bootstrap_resolver.rs index c3be8bd..44214e4 100644 --- a/src/bootstrap_resolver.rs +++ b/src/bootstrap_resolver.rs @@ -2,8 +2,7 @@ //! relay/target, blocklist CDN). When numa is its own system resolver //! (`/etc/resolv.conf → 127.0.0.1`, HAOS add-on, Pi-hole-style container), //! the default `getaddrinfo` path loops back through numa before numa can -//! answer — a chicken-and-egg that deadlocks cold boot. See issue #122 and -//! `docs/implementation/bootstrap-resolver.md`. +//! answer — a chicken-and-egg that deadlocks cold boot. See issue #122. //! //! Resolution order per hostname: //! 1. Per-hostname overrides (e.g. ODoH `relay_ip` / `target_ip`) → return diff --git a/src/forward.rs b/src/forward.rs index e3f307b..1c39292 100644 --- a/src/forward.rs +++ b/src/forward.rs @@ -175,8 +175,7 @@ pub fn parse_upstream( /// /// Uses the system resolver. Callers running inside `serve::run` pass the /// shared [`crate::bootstrap_resolver::NumaResolver`] via -/// [`build_https_client_with_resolver`] to avoid the self-loop documented -/// in `docs/implementation/bootstrap-resolver.md`. +/// [`build_https_client_with_resolver`] to avoid the self-loop (issue #122). pub fn build_https_client() -> reqwest::Client { build_https_client_with_resolver(1, None) } diff --git a/src/health.rs b/src/health.rs index 5767f4b..30cad9a 100644 --- a/src/health.rs +++ b/src/health.rs @@ -7,11 +7,10 @@ //! Both handlers call [`HealthResponse::build`] to assemble the JSON //! response from `HealthMeta` + live inputs. //! -//! JSON schema is documented in `docs/implementation/ios-companion-app.md` -//! §4.2. The iOS companion app's `HealthInfo` struct is the canonical -//! consumer; any change to this response must keep that struct decoding -//! cleanly (all consumed fields are optional on the Swift side, but -//! `lan_ip` is load-bearing for the pipeline). +//! The iOS companion app's `HealthInfo` struct is the canonical consumer; +//! any change to this response must keep that struct decoding cleanly (all +//! consumed fields are optional on the Swift side, but `lan_ip` is +//! load-bearing for the pipeline). use std::net::Ipv4Addr; use std::path::Path; diff --git a/src/serve.rs b/src/serve.rs index c76d174..e20ebe8 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -52,7 +52,6 @@ pub async fn run(config_path: String) -> crate::Result<()> { // Routes numa-originated HTTPS (DoH upstream, ODoH relay/target, blocklist // CDN) away from the system resolver so lookups don't loop back through // numa when it's its own system DNS. - // See `docs/implementation/bootstrap-resolver.md`. let resolver_overrides = match config.upstream.mode { crate::config::UpstreamMode::Odoh => config .upstream -- 2.34.1 From 4aa91a52369670c1c8010a1c577684257367e684 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Fri, 24 Apr 2026 17:51:14 +0300 Subject: [PATCH 136/139] fix(api): Cache-Control: no-cache on dashboard HTML Browsers heuristically cached the dashboard page because the response carried no Cache-Control header, so a numa upgrade on the daemon did not surface updated PATH_DEFS (e.g. the UPSTREAM row added in v0.14.0) until the user hard-reloaded. Force revalidation on every load. Closes #144. --- src/api.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/api.rs b/src/api.rs index 7f02920..eb31ef1 100644 --- a/src/api.rs +++ b/src/api.rs @@ -83,8 +83,13 @@ pub fn router(ctx: Arc) -> Router { } async fn dashboard() -> impl IntoResponse { + // Revalidate each load so browsers don't keep serving a stale + // dashboard across numa upgrades. ( - [(header::CONTENT_TYPE, "text/html; charset=utf-8")], + [ + (header::CONTENT_TYPE, "text/html; charset=utf-8"), + (header::CACHE_CONTROL, "no-cache"), + ], DASHBOARD_HTML, ) } @@ -1244,6 +1249,13 @@ mod tests { .await .unwrap(); assert_eq!(resp.status(), 200); + assert_eq!( + resp.headers() + .get(header::CACHE_CONTROL) + .map(|v| v.to_str().unwrap()), + Some("no-cache"), + "dashboard must revalidate to avoid stale HTML across upgrades" + ); let body = axum::body::to_bytes(resp.into_body(), 100000) .await .unwrap(); -- 2.34.1 From d090e049ec9d535a96425e26189c9ef1efdf46d2 Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Fri, 24 Apr 2026 17:57:51 +0300 Subject: [PATCH 137/139] ci(aur): attach to master after clone to avoid detached HEAD aur.archlinux.org stopped advertising the HEAD symref around 2026-04-22 (`git ls-remote --symref` returns HEAD as a raw SHA, no 'ref:' line). Fresh clones therefore land in detached HEAD, commits do not land on any branch, and 'git push origin master' fails with: error: src refspec master does not match any Every AUR publish run since has failed for this reason. Checking out master explicitly after clone attaches the working copy to the branch the push targets. refs/heads/master is still present on the remote, so no other changes are needed. --- .github/workflows/publish-aur.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/publish-aur.yml b/.github/workflows/publish-aur.yml index 6bd77e7..5737c21 100644 --- a/.github/workflows/publish-aur.yml +++ b/.github/workflows/publish-aur.yml @@ -126,6 +126,10 @@ jobs: # ssh://aur@aur.archlinux.org/.git git clone ssh://aur@aur.archlinux.org/$AUR_PKGNAME.git aur-repo + # AUR's git server no longer advertises HEAD's symref, so clone + # lands in detached HEAD. Attach to master before committing. + git -C aur-repo checkout master + cp PKGBUILD aur-repo/ cd aur-repo -- 2.34.1 From cfef4f4160f8e7d34b03f5cd7a608a140de6d95c Mon Sep 17 00:00:00 2001 From: Razvan Dimescu Date: Fri, 24 Apr 2026 19:03:02 +0300 Subject: [PATCH 138/139] fix(cache): refresh honors forwarding rules (#147) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refresh_entry unconditionally queried the default upstream, so any domain covered by a forwarding rule got re-resolved through the public resolver once its cache entry hit NearExpiry or Stale. The resulting NXDOMAIN/NODATA overwrote the good answer for at least cache.min_ttl (60s default), persisting until restart. Match the precedence from resolve_query: forwarding rule wins over recursive/default upstream. Extract a_record_response() helper in testutil and migrate six call sites — two regression tests here plus four adjacent tests using the same boilerplate. --- src/ctx.rs | 130 ++++++++++++++++++++++++++++++++++++------------ src/testutil.rs | 16 ++++++ 2 files changed, 114 insertions(+), 32 deletions(-) diff --git a/src/ctx.rs b/src/ctx.rs index 0d39f7d..d4741ec 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -408,6 +408,33 @@ fn cache_and_parse( /// Used for both stale-entry refresh and proactive cache warming. pub async fn refresh_entry(ctx: &ServerCtx, qname: &str, qtype: QueryType) { let query = DnsPacket::query(0, qname, qtype); + + // Forwarding rules must win here, mirroring `resolve_query` — otherwise + // refresh re-resolves private zones through the default upstream and + // poisons the cache with NXDOMAIN. + if let Some(pool) = crate::system_dns::match_forwarding_rule(qname, &ctx.forwarding_rules) { + let mut buf = BytePacketBuffer::new(); + if query.write(&mut buf).is_ok() { + if let Ok(wire) = forward_with_failover_raw( + buf.filled(), + pool, + &ctx.srtt, + ctx.timeout, + ctx.hedge_delay, + ) + .await + { + ctx.cache.write().unwrap().insert_wire( + qname, + qtype, + &wire, + DnssecStatus::Indeterminate, + ); + } + } + return; + } + if ctx.upstream_mode == UpstreamMode::Recursive { if let Ok(resp) = crate::recursive::resolve_recursive( qname, @@ -1244,14 +1271,8 @@ mod tests { #[tokio::test] async fn pipeline_filter_aaaa_leaves_a_queries_alone() { - let mut upstream_resp = DnsPacket::new(); - upstream_resp.header.response = true; - upstream_resp.header.rescode = ResultCode::NOERROR; - upstream_resp.answers.push(DnsRecord::A { - domain: "example.com".to_string(), - addr: Ipv4Addr::new(93, 184, 216, 34), - ttl: 300, - }); + let upstream_resp = + crate::testutil::a_record_response("example.com", Ipv4Addr::new(93, 184, 216, 34), 300); let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await; let mut ctx = crate::testutil::test_ctx().await; @@ -1471,14 +1492,8 @@ mod tests { #[tokio::test] async fn pipeline_forwarding_returns_upstream_answer() { - let mut upstream_resp = DnsPacket::new(); - upstream_resp.header.response = true; - upstream_resp.header.rescode = ResultCode::NOERROR; - upstream_resp.answers.push(DnsRecord::A { - domain: "internal.corp".to_string(), - addr: Ipv4Addr::new(10, 1, 2, 3), - ttl: 600, - }); + let upstream_resp = + crate::testutil::a_record_response("internal.corp", Ipv4Addr::new(10, 1, 2, 3), 600); let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await; let mut ctx = crate::testutil::test_ctx().await; @@ -1505,14 +1520,8 @@ mod tests { async fn pipeline_forwarding_fails_over_to_second_upstream() { let dead = crate::testutil::blackhole_upstream(); - let mut live_resp = DnsPacket::new(); - live_resp.header.response = true; - live_resp.header.rescode = ResultCode::NOERROR; - live_resp.answers.push(DnsRecord::A { - domain: "internal.corp".to_string(), - addr: Ipv4Addr::new(10, 9, 9, 9), - ttl: 600, - }); + let live_resp = + crate::testutil::a_record_response("internal.corp", Ipv4Addr::new(10, 9, 9, 9), 600); let live = crate::testutil::mock_upstream(live_resp).await; let mut ctx = crate::testutil::test_ctx().await; @@ -1534,14 +1543,8 @@ mod tests { #[tokio::test] async fn pipeline_default_pool_reports_upstream_path() { - let mut upstream_resp = DnsPacket::new(); - upstream_resp.header.response = true; - upstream_resp.header.rescode = ResultCode::NOERROR; - upstream_resp.answers.push(DnsRecord::A { - domain: "example.com".to_string(), - addr: Ipv4Addr::new(93, 184, 216, 34), - ttl: 300, - }); + let upstream_resp = + crate::testutil::a_record_response("example.com", Ipv4Addr::new(93, 184, 216, 34), 300); let upstream_addr = crate::testutil::mock_upstream(upstream_resp).await; let ctx = crate::testutil::test_ctx().await; @@ -1556,4 +1559,67 @@ mod tests { assert_eq!(resp.header.rescode, ResultCode::NOERROR); assert_eq!(resp.answers.len(), 1); } + + #[tokio::test] + async fn refresh_entry_honors_forwarding_rule() { + let rule_resp = + crate::testutil::a_record_response("internal.corp", Ipv4Addr::new(10, 0, 0, 42), 300); + let rule_upstream = crate::testutil::mock_upstream(rule_resp).await; + + let mut ctx = crate::testutil::test_ctx().await; + ctx.forwarding_rules = vec![ForwardingRule::new( + "corp".to_string(), + UpstreamPool::new(vec![Upstream::Udp(rule_upstream)], vec![]), + )]; + // Default pool points at a blackhole — if the refresh queries it + // instead of the rule, the test fails because nothing is cached. + ctx.upstream_pool + .lock() + .unwrap() + .set_primary(vec![Upstream::Udp(crate::testutil::blackhole_upstream())]); + let ctx = Arc::new(ctx); + + refresh_entry(&ctx, "internal.corp", QueryType::A).await; + + let cached = ctx + .cache + .read() + .unwrap() + .lookup("internal.corp", QueryType::A) + .expect("refresh must populate cache via forwarding rule"); + match &cached.answers[0] { + DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::new(10, 0, 0, 42)), + other => panic!("expected A record, got {:?}", other), + } + } + + #[tokio::test] + async fn refresh_entry_prefers_forwarding_rule_over_recursive() { + let rule_resp = + crate::testutil::a_record_response("db.internal.corp", Ipv4Addr::new(10, 0, 0, 7), 300); + let rule_upstream = crate::testutil::mock_upstream(rule_resp).await; + + let mut ctx = crate::testutil::test_ctx().await; + ctx.upstream_mode = UpstreamMode::Recursive; + ctx.forwarding_rules = vec![ForwardingRule::new( + "corp".to_string(), + UpstreamPool::new(vec![Upstream::Udp(rule_upstream)], vec![]), + )]; + // No root_hints — recursion would fail immediately, proving that + // the rule branch fired instead. + let ctx = Arc::new(ctx); + + refresh_entry(&ctx, "db.internal.corp", QueryType::A).await; + + let cached = ctx + .cache + .read() + .unwrap() + .lookup("db.internal.corp", QueryType::A) + .expect("recursive-mode refresh must still consult forwarding rules"); + match &cached.answers[0] { + DnsRecord::A { addr, .. } => assert_eq!(*addr, Ipv4Addr::new(10, 0, 0, 7)), + other => panic!("expected A record, got {:?}", other), + } + } } diff --git a/src/testutil.rs b/src/testutil.rs index fab861b..2bb8aa5 100644 --- a/src/testutil.rs +++ b/src/testutil.rs @@ -12,11 +12,13 @@ use crate::cache::DnsCache; use crate::config::UpstreamMode; use crate::ctx::ServerCtx; use crate::forward::{Upstream, UpstreamPool}; +use crate::header::ResultCode; use crate::health::HealthMeta; use crate::lan::PeerStore; use crate::override_store::OverrideStore; use crate::packet::DnsPacket; use crate::query_log::QueryLog; +use crate::record::DnsRecord; use crate::service_store::ServiceStore; use crate::srtt::SrttCache; use crate::stats::ServerStats; @@ -67,6 +69,20 @@ pub async fn test_ctx() -> ServerCtx { } } +/// Build a NOERROR response containing a single A record — the shape used +/// repeatedly by pipeline/forwarding tests to seed `mock_upstream`. +pub fn a_record_response(domain: &str, addr: Ipv4Addr, ttl: u32) -> DnsPacket { + let mut pkt = DnsPacket::new(); + pkt.header.response = true; + pkt.header.rescode = ResultCode::NOERROR; + pkt.answers.push(DnsRecord::A { + domain: domain.to_string(), + addr, + ttl, + }); + pkt +} + /// Spawn a UDP socket that replies to the first DNS query with the given /// response packet (patching the query ID to match). Returns the socket address. pub async fn mock_upstream(response: DnsPacket) -> SocketAddr { -- 2.34.1 From 63a2d262766ef8dddc25b093274e251810861eb3 Mon Sep 17 00:00:00 2001 From: Krtek Zee Date: Fri, 24 Apr 2026 17:42:32 -0700 Subject: [PATCH 139/139] fix: title alignment --- src/serve.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/serve.rs b/src/serve.rs index e20ebe8..70401cc 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -342,12 +342,13 @@ pub async fn run(config_path: String) -> crate::Result<()> { }; // Title row: center within the box + let tag_line = "DNS that governs itself"; let title = format!( - "{b}NUMA{r} {it}DNS that governs itself{r} {d}v{}{r}", + "{b}NUMA{r} {it}{tag_line}{r} {d}v{}{r}", env!("CARGO_PKG_VERSION") ); // The title contains ANSI codes; visible length is ~38 chars. Pad to fill the box. - let title_visible_len = 4 + 2 + 24 + 2 + 1 + env!("CARGO_PKG_VERSION").len() + 1; + let title_visible_len = 4 + 2 + tag_line.len() + 2 + 1 + env!("CARGO_PKG_VERSION").len() + 1; let title_pad = w.saturating_sub(title_visible_len); eprintln!("\n{o} ╔{bar_top}╗{r}"); eprint!("{o} ║{r} {title}"); -- 2.34.1