From bc26ff198dc95fa8f550a2d5933b516d6516a770 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 2 Jan 2025 12:18:23 +0100 Subject: [PATCH 01/52] add UdpSocket, Quic listener and basic Quic connection --- pingora-core/src/listeners/l4.rs | 140 +++++++++++++++++++++- pingora-core/src/listeners/mod.rs | 7 +- pingora-core/src/protocols/l4/listener.rs | 31 +++++ pingora-core/src/protocols/l4/mod.rs | 1 + pingora-core/src/protocols/l4/quic.rs | 92 ++++++++++++++ pingora-core/src/protocols/l4/stream.rs | 37 ++++++ pingora-core/tests/test_basic.rs | 11 ++ pingora-core/tests/utils/mod.rs | 2 + 8 files changed, 315 insertions(+), 6 deletions(-) create mode 100644 pingora-core/src/protocols/l4/quic.rs diff --git a/pingora-core/src/listeners/l4.rs b/pingora-core/src/listeners/l4.rs index c83232044..2ce2fbb87 100644 --- a/pingora-core/src/listeners/l4.rs +++ b/pingora-core/src/listeners/l4.rs @@ -27,17 +27,18 @@ use std::os::unix::net::UnixListener as StdUnixListener; #[cfg(windows)] use std::os::windows::io::{AsRawSocket, FromRawSocket}; use std::time::Duration; -use tokio::net::TcpSocket; +use tokio::net::{TcpSocket, UdpSocket}; use crate::protocols::l4::ext::{set_dscp, set_tcp_fastopen_backlog}; use crate::protocols::l4::listener::Listener; +use crate::protocols::l4::quic::Listener as QuicListener; pub use crate::protocols::l4::stream::Stream; use crate::protocols::TcpKeepalive; #[cfg(unix)] use crate::server::ListenFds; -const TCP_LISTENER_MAX_TRY: usize = 30; -const TCP_LISTENER_TRY_STEP: Duration = Duration::from_secs(1); +const LISTENER_MAX_TRY: usize = 30; +const LISTENER_TRY_STEP: Duration = Duration::from_secs(1); // TODO: configurable backlog const LISTENER_BACKLOG: u32 = 65535; @@ -45,14 +46,22 @@ const LISTENER_BACKLOG: u32 = 65535; #[derive(Clone, Debug)] pub enum ServerAddress { Tcp(String, Option), + Udp(String, Option, ServerProtocol), #[cfg(unix)] Uds(String, Option), } +#[derive(Clone, Debug)] +pub enum ServerProtocol { + // e.g. raw UDP, QUIC flavours/implementations/versions + Quic, +} + impl AsRef for ServerAddress { fn as_ref(&self) -> &str { match &self { Self::Tcp(l, _) => l, + Self::Udp(l, _, _) => l, #[cfg(unix)] Self::Uds(l, _) => l, } @@ -89,6 +98,21 @@ pub struct TcpSocketOptions { // TODO: allow configuring reuseaddr, backlog, etc. from here? } +/// UDP socket configuration options, this is used for setting options on +/// listening sockets. +#[non_exhaustive] +#[derive(Clone, Debug, Default)] +pub struct UdpSocketOptions { + /// IPV6_V6ONLY flag (if true, limit socket to IPv6 communication only). + /// This is mostly useful when binding to `[::]`, which on most Unix distributions + /// will bind to both IPv4 and IPv6 addresses by default. + pub ipv6_only: Option, + /// Specifies the server should set the following DSCP value on outgoing connections. + /// See the [RFC](https://datatracker.ietf.org/doc/html/rfc2474) for more details. + pub dscp: Option, + // TODO: allow configuring reuseaddr, backlog, etc. from here? +} + #[cfg(unix)] mod uds { use super::{OrErr, Result}; @@ -172,8 +196,48 @@ fn apply_tcp_socket_options(sock: &TcpSocket, opt: Option<&TcpSocketOptions>) -> Ok(()) } +// currently, these options can only apply on sockets prior to calling bind() +fn apply_udp_socket_options( + socket_ref: &socket2::Socket, + opt: Option<&UdpSocketOptions>, +) -> Result<()> { + let Some(opt) = opt else { + return Ok(()); + }; + + if let Some(ipv6_only) = opt.ipv6_only { + socket_ref + .set_only_v6(ipv6_only) + .or_err(BindError, "failed to set IPV6_V6ONLY")?; + } + + #[cfg(unix)] + let raw = socket_ref.as_raw_fd(); + #[cfg(windows)] + let raw = socket_ref.as_raw_socket(); + + if let Some(dscp) = opt.dscp { + set_dscp(raw, dscp)?; + } + Ok(()) +} + fn from_raw_fd(address: &ServerAddress, fd: i32) -> Result { match address { + ServerAddress::Udp(_, _, proto) => { + #[cfg(unix)] + let std_listener_socket = unsafe { std::net::UdpSocket::from_raw_fd(fd) }; + #[cfg(windows)] + let std_listener_socket = unsafe { std::net::UdpSocket::from_raw_socket(fd as u64) }; + + match proto { + ServerProtocol::Quic => { + let socket = UdpSocket::from_std(std_listener_socket) + .or_err_with(BindError, || format!("Listen() failed on {address:?}"))?; + Ok(QuicListener::from(socket).into()) + } + } + } #[cfg(unix)] ServerAddress::Uds(addr, perm) => { let std_listener = unsafe { StdUnixListener::from_raw_fd(fd) }; @@ -233,13 +297,69 @@ async fn bind_tcp(addr: &str, opt: Option) -> Result break Err(e).or_err_with(BindError, || format!("bind() failed on {addr}")); } try_count += 1; - if try_count >= TCP_LISTENER_MAX_TRY { + if try_count >= LISTENER_MAX_TRY { + break Err(e).or_err_with(BindError, || { + format!("bind() failed, after retries, {addr} still in use") + }); + } + warn!("{addr} is in use, will try again"); + tokio::time::sleep(LISTENER_TRY_STEP).await; + } + } + } +} + +async fn bind_udp_socket(addr: &str, opt: Option) -> Result { + let mut try_count = 0; + loop { + let sock_addr = addr + .to_socket_addrs() // NOTE: this could invoke a blocking network lookup + .or_err_with(BindError, || format!("Invalid listen address {addr}"))? + .next() // take the first one for now + .unwrap(); // assume there is always at least one + + let ty = socket2::Type::DGRAM; + let listener_socket = match sock_addr { + SocketAddr::V4(_) => socket2::Socket::new( + socket2::Domain::IPV4, + ty.nonblocking(), + Some(socket2::Protocol::UDP), + ), + SocketAddr::V6(_) => socket2::Socket::new( + socket2::Domain::IPV6, + ty.nonblocking(), + Some(socket2::Protocol::UDP), + ), + } + .or_err_with(BindError, || format!("fail to create address {sock_addr}"))?; + + // NOTE: this is to preserve the current UdpListener::bind() behavior. + // We have a few tests relying on this behavior to allow multiple identical + // test servers to coexist. + listener_socket + .set_reuse_address(true) + .or_err(BindError, "fail to set_reuseaddr(true)")?; + + apply_udp_socket_options(&listener_socket, opt.as_ref())?; + + listener_socket + .set_nonblocking(true) // required using tokio::net::UdpSocket::from_std(socket) + .or_err(BindError, "fail to set_nonblocking(true)")?; + + match listener_socket.bind(&(sock_addr.into())) { + Ok(()) => break Ok(listener_socket.into()), + Err(e) => { + if e.kind() != ErrorKind::AddrInUse { + break Err(e).or_err_with(BindError, || format!("bind() failed on {addr}")); + } + try_count += 1; + if try_count >= LISTENER_MAX_TRY { break Err(e).or_err_with(BindError, || { format!("bind() failed, after retries, {addr} still in use") }); } warn!("{addr} is in use, will try again"); - tokio::time::sleep(TCP_LISTENER_TRY_STEP).await; + tokio::time::sleep(LISTENER_TRY_STEP).await; } } } @@ -250,6 +370,16 @@ async fn bind(addr: &ServerAddress) -> Result { #[cfg(unix)] ServerAddress::Uds(l, perm) => uds::bind(l, perm.clone()), ServerAddress::Tcp(l, opt) => bind_tcp(l, opt.clone()).await, + ServerAddress::Udp(l, opt, proto) => match proto { + ServerProtocol::Quic => { + let std_socket = bind_udp_socket(l, opt.clone()) + .await + .or_err(BindError, "bind() failed")?; + let tokio_socket = UdpSocket::try_from(std_socket) + .or_err(BindError, "failed to create UdpSocket")?; + Ok(Listener::from(QuicListener::from(tokio_socket))) + } + }, } } diff --git a/pingora-core/src/listeners/mod.rs b/pingora-core/src/listeners/mod.rs index 82c31cc7e..5a0da3e45 100644 --- a/pingora-core/src/listeners/mod.rs +++ b/pingora-core/src/listeners/mod.rs @@ -31,7 +31,7 @@ use async_trait::async_trait; use pingora_error::Result; use std::{fs::Permissions, sync::Arc}; -use l4::{ListenerEndpoint, Stream as L4Stream}; +use l4::{ListenerEndpoint, ServerProtocol, Stream as L4Stream}; use tls::{Acceptor, TlsSettings}; pub use crate::protocols::tls::ALPN; @@ -154,6 +154,11 @@ impl Listeners { Ok(listeners) } + /// Add a QUIC endpoint to `self`. + pub fn add_quic(&mut self, addr: &str) { + self.add_address(ServerAddress::Udp(addr.into(), None, ServerProtocol::Quic)); + } + /// Add a TCP endpoint to `self`. pub fn add_tcp(&mut self, addr: &str) { self.add_address(ServerAddress::Tcp(addr.into(), None)); diff --git a/pingora-core/src/protocols/l4/listener.rs b/pingora-core/src/protocols/l4/listener.rs index d62f7f0ca..2bcefe682 100644 --- a/pingora-core/src/protocols/l4/listener.rs +++ b/pingora-core/src/protocols/l4/listener.rs @@ -24,16 +24,24 @@ use tokio::net::TcpListener; use tokio::net::UnixListener; use crate::protocols::digest::{GetSocketDigest, SocketDigest}; +use crate::protocols::l4::quic::Listener as QuicListener; use crate::protocols::l4::stream::Stream; /// The type for generic listener for both TCP and Unix domain socket #[derive(Debug)] pub enum Listener { + Quic(QuicListener), Tcp(TcpListener), #[cfg(unix)] Unix(UnixListener), } +impl From for Listener { + fn from(s: QuicListener) -> Self { + Self::Quic(s) + } +} + impl From for Listener { fn from(s: TcpListener) -> Self { Self::Tcp(s) @@ -51,6 +59,7 @@ impl From for Listener { impl AsRawFd for Listener { fn as_raw_fd(&self) -> std::os::unix::io::RawFd { match &self { + Self::Quic(l) => l.get_raw_fd(), Self::Tcp(l) => l.as_raw_fd(), Self::Unix(l) => l.as_raw_fd(), } @@ -69,7 +78,29 @@ impl AsRawSocket for Listener { impl Listener { /// Accept a connection from the listening endpoint pub async fn accept(&self) -> io::Result { + // TODO: changing to &mut self would help to simplify connection state locks for Quic + // not required for TCP/UDS, feasible to change to unique (mut) access? match &self { + Self::Quic(l) => { + // TODO: update digest when peer_addr changes; + // a Quic connection supports IP address switching; + // for multi-path a primary peer_addr needs to be selected + l.accept().await.map(|(stream, peer_addr)| { + let mut s: Stream = stream.into(); + + #[cfg(unix)] + let digest = SocketDigest::from_raw_fd(s.as_raw_fd()); + #[cfg(windows)] + let digest = SocketDigest::from_raw_socket(stream.as_raw_socket()); + + digest + .peer_addr + .set(Some(peer_addr.into())) + .expect("newly created OnceCell must be empty"); + s.set_socket_digest(digest); + s + }) + } Self::Tcp(l) => l.accept().await.map(|(stream, peer_addr)| { let mut s: Stream = stream.into(); #[cfg(unix)] diff --git a/pingora-core/src/protocols/l4/mod.rs b/pingora-core/src/protocols/l4/mod.rs index cfa65e03b..9223471fe 100644 --- a/pingora-core/src/protocols/l4/mod.rs +++ b/pingora-core/src/protocols/l4/mod.rs @@ -16,5 +16,6 @@ pub mod ext; pub mod listener; +pub mod quic; pub mod socket; pub mod stream; diff --git a/pingora-core/src/protocols/l4/quic.rs b/pingora-core/src/protocols/l4/quic.rs new file mode 100644 index 000000000..1adf55525 --- /dev/null +++ b/pingora-core/src/protocols/l4/quic.rs @@ -0,0 +1,92 @@ +use crate::protocols::l4::stream::Stream as L4Stream; +use std::fmt::{Debug, Formatter}; +use std::io; +use std::io::Error; +use std::net::SocketAddr; +use std::os::fd::{AsRawFd, RawFd}; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; +use tokio::net::UdpSocket; + +pub struct Listener { + io: Arc, +} + +impl From for Listener { + fn from(io: UdpSocket) -> Self { + Listener { io: Arc::new(io) } + } +} + +impl Listener { + pub(crate) async fn accept(&self) -> io::Result<(L4Stream, SocketAddr)> { + // TODO: SocketAddr should be remote addr + let addr = self.io.local_addr()?; + + Ok(( + QuicConnection { + io: self.io.clone(), + } + .into(), + addr, + )) + } + + pub(super) fn get_raw_fd(&self) -> RawFd { + self.io.as_raw_fd() + } +} + +impl AsRawFd for QuicConnection { + fn as_raw_fd(&self) -> RawFd { + self.io.as_raw_fd() + } +} + +impl Debug for Listener { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Listener").field("io", &self.io).finish() + } +} + +pub(crate) struct QuicConnection { + pub(crate) io: Arc, +} + +impl Debug for QuicConnection { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("QuicConnection").finish() + } +} + +#[allow(unused_variables)] // TODO: remove +impl AsyncWrite for QuicConnection { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + todo!() + } + + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + todo!() + } + + fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + todo!() + } +} + +#[allow(unused_variables)] // TODO: remove +impl AsyncRead for QuicConnection { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + todo!() + } +} diff --git a/pingora-core/src/protocols/l4/stream.rs b/pingora-core/src/protocols/l4/stream.rs index e561908eb..e149f36d2 100644 --- a/pingora-core/src/protocols/l4/stream.rs +++ b/pingora-core/src/protocols/l4/stream.rs @@ -37,6 +37,7 @@ use tokio::net::TcpStream; use tokio::net::UnixStream; use crate::protocols::l4::ext::{set_tcp_keepalive, TcpKeepalive}; +use crate::protocols::l4::quic::QuicConnection; use crate::protocols::raw_connect::ProxyDigest; use crate::protocols::{ GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown, SocketDigest, Ssl, @@ -46,6 +47,7 @@ use crate::upstreams::peer::Tracer; #[derive(Debug)] enum RawStream { + Quic(QuicConnection), Tcp(TcpStream), #[cfg(unix)] Unix(UnixStream), @@ -60,6 +62,7 @@ impl AsyncRead for RawStream { // Safety: Basic enum pin projection unsafe { match &mut Pin::get_unchecked_mut(self) { + RawStream::Quic(s) => Pin::new_unchecked(s).poll_read(cx, buf), RawStream::Tcp(s) => Pin::new_unchecked(s).poll_read(cx, buf), #[cfg(unix)] RawStream::Unix(s) => Pin::new_unchecked(s).poll_read(cx, buf), @@ -73,6 +76,7 @@ impl AsyncWrite for RawStream { // Safety: Basic enum pin projection unsafe { match &mut Pin::get_unchecked_mut(self) { + RawStream::Quic(s) => Pin::new_unchecked(s).poll_write(cx, buf), RawStream::Tcp(s) => Pin::new_unchecked(s).poll_write(cx, buf), #[cfg(unix)] RawStream::Unix(s) => Pin::new_unchecked(s).poll_write(cx, buf), @@ -84,6 +88,7 @@ impl AsyncWrite for RawStream { // Safety: Basic enum pin projection unsafe { match &mut Pin::get_unchecked_mut(self) { + RawStream::Quic(s) => Pin::new_unchecked(s).poll_flush(cx), RawStream::Tcp(s) => Pin::new_unchecked(s).poll_flush(cx), #[cfg(unix)] RawStream::Unix(s) => Pin::new_unchecked(s).poll_flush(cx), @@ -95,6 +100,7 @@ impl AsyncWrite for RawStream { // Safety: Basic enum pin projection unsafe { match &mut Pin::get_unchecked_mut(self) { + RawStream::Quic(s) => Pin::new_unchecked(s).poll_shutdown(cx), RawStream::Tcp(s) => Pin::new_unchecked(s).poll_shutdown(cx), #[cfg(unix)] RawStream::Unix(s) => Pin::new_unchecked(s).poll_shutdown(cx), @@ -110,6 +116,7 @@ impl AsyncWrite for RawStream { // Safety: Basic enum pin projection unsafe { match &mut Pin::get_unchecked_mut(self) { + RawStream::Quic(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs), RawStream::Tcp(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs), #[cfg(unix)] RawStream::Unix(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs), @@ -119,6 +126,7 @@ impl AsyncWrite for RawStream { fn is_write_vectored(&self) -> bool { match self { + RawStream::Quic(s) => s.is_write_vectored(), RawStream::Tcp(s) => s.is_write_vectored(), #[cfg(unix)] RawStream::Unix(s) => s.is_write_vectored(), @@ -130,6 +138,7 @@ impl AsyncWrite for RawStream { impl AsRawFd for RawStream { fn as_raw_fd(&self) -> std::os::unix::io::RawFd { match self { + RawStream::Quic(s) => s.as_raw_fd(), RawStream::Tcp(s) => s.as_raw_fd(), RawStream::Unix(s) => s.as_raw_fd(), } @@ -211,6 +220,7 @@ impl AsyncRead for RawStreamWrapper { unsafe { let rs_wrapper = Pin::get_unchecked_mut(self); match &mut rs_wrapper.stream { + RawStream::Quic(s) => return Pin::new_unchecked(s).poll_read(cx, buf), RawStream::Tcp(s) => return Pin::new_unchecked(s).poll_read(cx, buf), RawStream::Unix(s) => return Pin::new_unchecked(s).poll_read(cx, buf), } @@ -220,6 +230,7 @@ impl AsyncRead for RawStreamWrapper { // Safety: Basic pin projection to get mutable stream let rs_wrapper = unsafe { Pin::get_unchecked_mut(self) }; match &mut rs_wrapper.stream { + RawStream::Quic(s) => unsafe { Pin::new_unchecked(s).poll_read(cx, buf) }, RawStream::Tcp(s) => { loop { ready!(s.poll_read_ready(cx))?; @@ -273,6 +284,7 @@ impl AsyncWrite for RawStreamWrapper { // Safety: Basic enum pin projection unsafe { match &mut Pin::get_unchecked_mut(self).stream { + RawStream::Quic(s) => Pin::new_unchecked(s).poll_write(cx, buf), RawStream::Tcp(s) => Pin::new_unchecked(s).poll_write(cx, buf), #[cfg(unix)] RawStream::Unix(s) => Pin::new_unchecked(s).poll_write(cx, buf), @@ -284,6 +296,7 @@ impl AsyncWrite for RawStreamWrapper { // Safety: Basic enum pin projection unsafe { match &mut Pin::get_unchecked_mut(self).stream { + RawStream::Quic(s) => Pin::new_unchecked(s).poll_flush(cx), RawStream::Tcp(s) => Pin::new_unchecked(s).poll_flush(cx), #[cfg(unix)] RawStream::Unix(s) => Pin::new_unchecked(s).poll_flush(cx), @@ -295,6 +308,7 @@ impl AsyncWrite for RawStreamWrapper { // Safety: Basic enum pin projection unsafe { match &mut Pin::get_unchecked_mut(self).stream { + RawStream::Quic(s) => Pin::new_unchecked(s).poll_shutdown(cx), RawStream::Tcp(s) => Pin::new_unchecked(s).poll_shutdown(cx), #[cfg(unix)] RawStream::Unix(s) => Pin::new_unchecked(s).poll_shutdown(cx), @@ -310,6 +324,7 @@ impl AsyncWrite for RawStreamWrapper { // Safety: Basic enum pin projection unsafe { match &mut Pin::get_unchecked_mut(self).stream { + RawStream::Quic(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs), RawStream::Tcp(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs), #[cfg(unix)] RawStream::Unix(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs), @@ -414,6 +429,27 @@ impl Stream { } } +impl From for Stream { + fn from(s: QuicConnection) -> Self { + Stream { + stream: BufStream::with_capacity( + BUF_READ_SIZE, + BUF_WRITE_SIZE, + RawStreamWrapper::new(RawStream::Quic(s)), + ), + rewind_read_buf: Vec::new(), + buffer_write: true, + established_ts: SystemTime::now(), + proxy_digest: None, + socket_digest: None, + tracer: None, + read_pending_time: AccumulatedDuration::new(), + write_pending_time: AccumulatedDuration::new(), + rx_ts: None, + } + } +} + impl From for Stream { fn from(s: TcpStream) -> Self { Stream { @@ -552,6 +588,7 @@ impl Drop for Stream { } /* use nodelay/local_addr function to detect socket status */ let ret = match &self.stream.get_ref().stream { + RawStream::Quic(s) => s.io.local_addr().err(), RawStream::Tcp(s) => s.nodelay().err(), #[cfg(unix)] RawStream::Unix(s) => s.local_addr().err(), diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index 32954de92..cc1e3f88a 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -60,3 +60,14 @@ async fn test_uds() { let res = client.get(url).await.unwrap(); assert_eq!(res.status(), reqwest::StatusCode::OK); } + +#[tokio::test] +async fn test_udp() { + use log::info; + use std::time::Duration; + + utils::init(); + + info!("Startup completed.."); + tokio::time::sleep(Duration::from_secs(300)).await; +} diff --git a/pingora-core/tests/utils/mod.rs b/pingora-core/tests/utils/mod.rs index a832c4a07..faba93e1d 100644 --- a/pingora-core/tests/utils/mod.rs +++ b/pingora-core/tests/utils/mod.rs @@ -86,6 +86,8 @@ fn entry_point(opt: Option) { tls_settings.enable_h2(); listeners.add_tls_with_settings("0.0.0.0:6146", None, tls_settings); + listeners.add_quic("0.0.0.0:6147"); + let echo_service_http = Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); From f3a40f0db04ca8b330f95424f6b8038e171f7d51 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 2 Jan 2025 15:58:54 +0100 Subject: [PATCH 02/52] add HTTP3 server session, Quic TLS handshake preparations currently add quiche dependency only on boringssl feature quiche does not link correctly with the openssl feature rustls is currently not supported in quiche the QUIC/HTTP3 change affects lots of modules and files therefore currently not using a compile-time feature --- pingora-core/Cargo.toml | 6 +- pingora-core/src/apps/mod.rs | 110 +++++- pingora-core/src/listeners/mod.rs | 6 +- pingora-core/src/protocols/http/mod.rs | 11 +- pingora-core/src/protocols/http/server.rs | 95 +++++- pingora-core/src/protocols/http/v3/mod.rs | 17 + pingora-core/src/protocols/http/v3/server.rs | 322 ++++++++++++++++++ pingora-core/src/protocols/l4/quic.rs | 116 ++++++- pingora-core/src/protocols/l4/stream.rs | 17 +- pingora-core/src/protocols/mod.rs | 16 +- .../protocols/tls/boringssl_openssl/stream.rs | 4 +- .../src/protocols/tls/noop_tls/mod.rs | 5 +- .../src/protocols/tls/rustls/stream.rs | 4 +- pingora-proxy/src/subrequest.rs | 6 +- pingora-proxy/tests/utils/server_utils.rs | 3 +- 15 files changed, 687 insertions(+), 51 deletions(-) create mode 100644 pingora-core/src/protocols/http/v3/mod.rs create mode 100644 pingora-core/src/protocols/http/v3/server.rs diff --git a/pingora-core/Cargo.toml b/pingora-core/Cargo.toml index 350be2f89..5dc226c6f 100644 --- a/pingora-core/Cargo.toml +++ b/pingora-core/Cargo.toml @@ -67,6 +67,8 @@ zstd = "0" httpdate = "1" x509-parser = { version = "0.16.0", optional = true } ouroboros = { version = "0.18.4", optional = true } +quiche = { git = 'https://github.com/cloudflare/quiche.git', rev = "1fd4557", optional = true } +ring = { version = "0.17.8", optional = true } [target.'cfg(unix)'.dependencies] daemonize = "0.5.0" @@ -88,8 +90,8 @@ jemallocator = "0.5" [features] default = [] -openssl = ["pingora-openssl", "openssl_derived",] -boringssl = ["pingora-boringssl", "openssl_derived",] +openssl = ["pingora-openssl", "openssl_derived"] +boringssl = ["pingora-boringssl", "openssl_derived", "dep:quiche", "dep:ring"] rustls = ["pingora-rustls", "any_tls", "dep:x509-parser", "ouroboros"] patched_http1 = ["pingora-http/patched_http1"] openssl_derived = ["any_tls"] diff --git a/pingora-core/src/apps/mod.rs b/pingora-core/src/apps/mod.rs index 2a77a3dfa..344dd72eb 100644 --- a/pingora-core/src/apps/mod.rs +++ b/pingora-core/src/apps/mod.rs @@ -23,8 +23,9 @@ use log::{debug, error}; use std::future::poll_fn; use std::sync::Arc; -use crate::protocols::http::v2::server; -use crate::protocols::http::ServerSession; +use crate::protocols::http::v2::server as h2_server; +use crate::protocols::http::v3::server as h3_server; +use crate::protocols::http::{HttpVersion, ServerSession}; use crate::protocols::Digest; use crate::protocols::Stream; use crate::protocols::ALPN; @@ -57,12 +58,13 @@ pub trait ServerApp { /// This callback will be called once after the service stops listening to its endpoints. async fn cleanup(&self) {} } + #[non_exhaustive] #[derive(Default)] /// HTTP Server options that control how the server handles some transport types. pub struct HttpServerOptions { - /// Use HTTP/2 for plaintext. - pub h2c: bool, + /// HTTP version to use. + pub http_version: HttpVersion, } /// This trait defines the interface of an HTTP application. @@ -84,7 +86,15 @@ pub trait HttpServerApp { /// every time a new HTTP/2 **connection** needs to be established. /// /// A `None` means to use the built-in default options. See [`server::H2Options`] for more details. - fn h2_options(&self) -> Option { + fn h2_options(&self) -> Option { + None + } + + /// Provide options on how HTTP/3 connection should be established. This function will be called + /// every time a new HTTP/3 **connection** needs to be established. + /// + /// A `None` means to use the built-in default options. See [`server::H2Options`] for more details. + fn h3_options(&self) -> Option<&h3_server::H3Options> { None } @@ -109,10 +119,17 @@ where mut stream: Stream, shutdown: &ShutdownWatch, ) -> Option { - let mut h2c = self.server_options().as_ref().map_or(false, |o| o.h2c); + let mut http_version = self + .server_options() + .as_ref() + .map_or(HttpVersion::V1, |o| o.http_version); + + if stream.quic_connection_state().is_some() { + http_version = HttpVersion::V3; + } // try to read h2 preface - if h2c { + if matches!(http_version, HttpVersion::V2) { let mut buf = [0u8; H2_PREFACE.len()]; let peeked = stream .try_peek(&mut buf) @@ -126,10 +143,30 @@ where // not all streams support peeking if peeked { // turn off h2c (use h1) if h2 preface doesn't exist - h2c = buf == H2_PREFACE; + http_version = match buf == H2_PREFACE { + true => HttpVersion::V2, + false => HttpVersion::V1, + }; } } - if h2c || matches!(stream.selected_alpn_proto(), Some(ALPN::H2)) { + + // TODO: logic for Http3 to Http2/1 fallback. Requires Http2/1 listener being present. + if matches!(http_version, HttpVersion::V3) + && (matches!(stream.selected_alpn_proto(), Some(ALPN::H1)) + || matches!(stream.selected_alpn_proto(), Some(ALPN::H2)) + || matches!(stream.selected_alpn_proto(), Some(ALPN::H2H1))) + { + error!( + "Server is configured for {:?}. Received ALPN: {}. \ + Fallback from Http3 to Http2/1 is currently not supported.", + http_version, + stream.selected_alpn_proto().unwrap() + ) + } + + if matches!(http_version, HttpVersion::V2) + || matches!(stream.selected_alpn_proto(), Some(ALPN::H2)) + { // create a shared connection digest let digest = Arc::new(Digest { ssl_digest: stream.get_ssl_digest(), @@ -140,7 +177,7 @@ where }); let h2_options = self.h2_options(); - let h2_conn = server::handshake(stream, h2_options).await; + let h2_conn = h2_server::handshake(stream, h2_options).await; let mut h2_conn = match h2_conn { Err(e) => { error!("H2 handshake error {e}"); @@ -160,7 +197,7 @@ where .await.map_err(|e| error!("H2 error waiting for shutdown {e}")); return None; } - h2_stream = server::HttpSession::from_h2_conn(&mut h2_conn, digest.clone()) => h2_stream + h2_stream = h2_server::HttpSession::from_h2_conn(&mut h2_conn, digest.clone()) => h2_stream }; let h2_stream = match h2_stream { Err(e) => { @@ -178,6 +215,57 @@ where .await; }); } + } else if matches!(http_version, HttpVersion::V3) { + // create a shared connection digest + let digest = Arc::new(Digest { + ssl_digest: stream.get_ssl_digest(), + // TODO: log h3 handshake time + timing_digest: stream.get_timing_digest(), + proxy_digest: stream.get_proxy_digest(), + socket_digest: stream.get_socket_digest(), + }); + + let h3_options = self.h3_options(); + let h3_conn = h3_server::handshake(stream, h3_options).await; + let mut h3_conn = match h3_conn { + Err(e) => { + error!("H3 handshake error {e}"); + return None; + } + Ok(c) => c, + }; + + let mut shutdown = shutdown.clone(); + loop { + // this loop ends when the client decides to close the h3 conn + // TODO: add a timeout? + let h3_stream = tokio::select! { + _ = shutdown.changed() => { + h3_conn.graceful_shutdown().await; + let _ = poll_fn(|cx| h3_conn.poll_closed(cx)) + .await.map_err(|e| error!("H3 error waiting for shutdown {e}")); + return None; + } + h3_stream = h3_server::HttpSession::from_h3_conn(&mut h3_conn, digest.clone()) => h3_stream + }; + + let h3_stream = match h3_stream { + Err(e) => { + // It is common for the client to just disconnect TCP without properly + // closing H2. So we don't log the errors here + debug!("H3 error when accepting new stream {e}"); + return None; + } + Ok(s) => s?, // None means the connection is ready to be closed + }; + + let app = self.clone(); + let shutdown = shutdown.clone(); + pingora_runtime::current_handle().spawn(async move { + app.process_new_http(ServerSession::new_http3(h3_stream), &shutdown) + .await; + }); + } } else { // No ALPN or ALPN::H1 and h2c was not configured, fallback to HTTP/1.1 self.process_new_http(ServerSession::new_http1(stream), shutdown) diff --git a/pingora-core/src/listeners/mod.rs b/pingora-core/src/listeners/mod.rs index 5a0da3e45..e26844774 100644 --- a/pingora-core/src/listeners/mod.rs +++ b/pingora-core/src/listeners/mod.rs @@ -22,7 +22,8 @@ pub mod tls; #[cfg(not(feature = "any_tls"))] pub use crate::tls::listeners as tls; -use crate::protocols::{tls::TlsRef, Stream}; +use crate::protocols::{tls::TlsRef, ConnectionState, Stream}; +use crate::protocols::l4::quic::handshake as quic_handshake; #[cfg(unix)] use crate::server::ListenFds; @@ -112,6 +113,9 @@ impl UninitializedStream { if let Some(tls) = self.tls { let tls_stream = tls.tls_handshake(self.l4).await?; Ok(Box::new(tls_stream)) + } else if let Some(state) = self.l4.quic_connection_state() { + quic_handshake(state)?; + Ok(Box::new(self.l4)) } else { Ok(Box::new(self.l4)) } diff --git a/pingora-core/src/protocols/http/mod.rs b/pingora-core/src/protocols/http/mod.rs index 94814ebdd..ab799a2a0 100644 --- a/pingora-core/src/protocols/http/mod.rs +++ b/pingora-core/src/protocols/http/mod.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! HTTP/1.x and HTTP/2 implementation APIs +//! HTTP/1.x, HTTP/2 and HTTP/3 implementation APIs mod body_buffer; pub mod bridge; @@ -24,6 +24,7 @@ pub mod error_resp; pub mod server; pub mod v1; pub mod v2; +pub mod v3; pub use server::Session as ServerSession; @@ -57,3 +58,11 @@ impl HttpTask { } } } + +#[derive(Debug, Default, Copy, Clone)] +pub enum HttpVersion { + #[default] + V1, + V2, + V3, +} diff --git a/pingora-core/src/protocols/http/server.rs b/pingora-core/src/protocols/http/server.rs index 49423e935..677d8c96d 100644 --- a/pingora-core/src/protocols/http/server.rs +++ b/pingora-core/src/protocols/http/server.rs @@ -14,10 +14,11 @@ //! HTTP server session APIs -use super::error_resp; use super::v1::server::HttpSession as SessionV1; use super::v2::server::HttpSession as SessionV2; +use super::v3::server::HttpSession as SessionV3; use super::HttpTask; +use super::{error_resp, HttpVersion}; use crate::protocols::{Digest, SocketAddr, Stream}; use bytes::Bytes; use http::HeaderValue; @@ -30,6 +31,7 @@ use std::time::Duration; pub enum Session { H1(SessionV1), H2(SessionV2), + H3(SessionV3), } impl Session { @@ -43,11 +45,33 @@ impl Session { Self::H2(session) } + /// Create a new [`Session`] from an established HTTP/3 stream + pub fn new_http3(session: SessionV3) -> Self { + Self::H3(session) + } + /// Whether the session is HTTP/2. If not it is HTTP/1.x pub fn is_http2(&self) -> bool { matches!(self, Self::H2(_)) } + /// Whether the session is HTTP/3. + pub fn is_http3(&self) -> bool { + match self { + Session::H3(_) => true, + _ => false, + } + } + + /// The session HTTP version. + pub fn http_version(&self) -> HttpVersion { + match self { + Session::H1(_) => HttpVersion::V1, + Session::H2(_) => HttpVersion::V2, + Session::H3(_) => HttpVersion::V3, + } + } + /// Read the request header. This method is required to be called first before doing anything /// else with the session. /// - `Ok(true)`: successful @@ -61,6 +85,7 @@ impl Session { } // This call will always return `Ok(true)` for Http2 because the request is already read Self::H2(_) => Ok(true), + Self::H3(_) => Ok(true), } } @@ -71,6 +96,7 @@ impl Session { match self { Self::H1(s) => s.req_header(), Self::H2(s) => s.req_header(), + Self::H3(s) => s.req_header(), } } @@ -81,6 +107,7 @@ impl Session { match self { Self::H1(s) => s.req_header_mut(), Self::H2(s) => s.req_header_mut(), + Self::H3(s) => s.req_header_mut(), } } @@ -103,6 +130,7 @@ impl Session { match self { Self::H1(s) => s.read_body_bytes().await, Self::H2(s) => s.read_body_bytes().await, + Self::H3(s) => s.read_body_bytes().await, } } @@ -116,6 +144,7 @@ impl Session { Ok(()) } Self::H2(s) => s.write_response_header(resp, false), + Self::H3(s) => s.write_response_header(resp, false).await, } } @@ -127,6 +156,7 @@ impl Session { Ok(()) } Self::H2(s) => s.write_response_header_ref(resp, false), + Self::H3(s) => s.write_response_header_ref(resp, false).await, } } @@ -144,6 +174,7 @@ impl Session { Ok(()) } Self::H2(s) => s.write_body(data, end), + Self::H3(s) => s.write_body(data, end).await, } } @@ -152,6 +183,7 @@ impl Session { match self { Self::H1(_) => Ok(()), // TODO: support trailers for h1 Self::H2(s) => s.write_trailers(trailers), + Self::H3(s) => s.write_trailers(trailers), } } @@ -169,6 +201,10 @@ impl Session { s.finish()?; Ok(None) } + Self::H3(mut s) => { + s.finish().await?; + Ok(None) + } } } @@ -176,15 +212,17 @@ impl Session { match self { Self::H1(s) => s.response_duplex_vec(tasks).await, Self::H2(s) => s.response_duplex_vec(tasks), + Self::H3(s) => s.response_duplex_vec(tasks).await, } } /// Set connection reuse. `duration` defines how long the connection is kept open for the next - /// request to reuse. Noop for h2 + /// request to reuse. Noop for h2/h3 pub fn set_keepalive(&mut self, duration: Option) { match self { Self::H1(s) => s.set_server_keepalive(duration), Self::H2(_) => {} + Self::H3(_) => {} } } @@ -192,11 +230,12 @@ impl Session { /// to write to the stream after `duration`. If a `min_send_rate` is /// configured then the `min_send_rate` calculated timeout has higher priority. /// - /// This is a noop for h2. + /// This is a noop for h2/h3. pub fn set_write_timeout(&mut self, timeout: Duration) { match self { Self::H1(s) => s.set_write_timeout(timeout), Self::H2(_) => {} + Self::H3(_) => {} } } @@ -209,11 +248,12 @@ impl Session { /// Calculated write timeout is guaranteed to be at least 1s if `min_send_rate` /// is greater than zero, a send rate of zero is a noop. /// - /// This is a noop for h2. + /// This is a noop for h2/h3. pub fn set_min_send_rate(&mut self, rate: usize) { match self { Self::H1(s) => s.set_min_send_rate(rate), Self::H2(_) => {} + Self::H3(_) => {} } } @@ -227,6 +267,7 @@ impl Session { match self { Self::H1(s) => s.set_ignore_info_resp(ignore), Self::H2(_) => {} // always ignored + Self::H3(_) => {} // TODO: check if there is a need for an implementation } } @@ -236,6 +277,7 @@ impl Session { match self { Self::H1(s) => s.request_summary(), Self::H2(s) => s.request_summary(), + Self::H3(s) => s.request_summary(), } } @@ -245,16 +287,20 @@ impl Session { match self { Self::H1(s) => s.response_written(), Self::H2(s) => s.response_written(), + Self::H3(s) => s.response_written(), } } /// Give up the http session abruptly. /// For H1 this will close the underlying connection - /// For H2 this will send RESET frame to end this stream without impacting the connection + /// For H2 this will send a RESET frame to end this stream + /// For H3 this will send a RESET_STREAM QUIC frame on the underlying QUIC stream + /// For H2 & H3 a call has no impact on the connection pub async fn shutdown(&mut self) { match self { Self::H1(s) => s.shutdown().await, Self::H2(s) => s.shutdown(), + Self::H3(s) => s.shutdown(), } } @@ -262,6 +308,7 @@ impl Session { match self { Self::H1(s) => s.get_headers_raw_bytes(), Self::H2(s) => s.pseudo_raw_h1_request_header(), + Self::H3(s) => s.pseudo_raw_h1_request_header(), } } @@ -270,6 +317,7 @@ impl Session { match self { Self::H1(s) => s.is_body_done(), Self::H2(s) => s.is_body_done(), + Self::H3(s) => s.is_body_done(), } } @@ -277,10 +325,12 @@ impl Session { /// for H1 chunked encoding, this will end the last empty chunk /// for H1 content-length, this has no effect. /// for H2, this will send an empty DATA frame with END_STREAM flag + /// for H3, this will send a FIN_STREAM frame on the underlying QUIC stream pub async fn finish_body(&mut self) -> Result<()> { match self { Self::H1(s) => s.finish_body().await.map(|_| ()), Self::H2(s) => s.finish(), + Self::H3(s) => s.finish().await, } } @@ -331,6 +381,7 @@ impl Session { match self { Self::H1(s) => s.is_body_empty(), Self::H2(s) => s.is_body_empty(), + Self::H3(s) => s.is_body_empty(), } } @@ -338,6 +389,7 @@ impl Session { match self { Self::H1(s) => s.retry_buffer_truncated(), Self::H2(s) => s.retry_buffer_truncated(), + Self::H3(s) => s.retry_buffer_truncated(), } } @@ -345,6 +397,7 @@ impl Session { match self { Self::H1(s) => s.enable_retry_buffering(), Self::H2(s) => s.enable_retry_buffering(), + Self::H3(s) => s.enable_retry_buffering(), } } @@ -352,6 +405,7 @@ impl Session { match self { Self::H1(s) => s.get_retry_buffer(), Self::H2(s) => s.get_retry_buffer(), + Self::H3(s) => s.get_retry_buffer(), } } @@ -361,20 +415,28 @@ impl Session { match self { Self::H1(s) => s.read_body_or_idle(no_body_expected).await, Self::H2(s) => s.read_body_or_idle(no_body_expected).await, + Self::H3(s) => s.read_body_or_idle(no_body_expected).await, } } pub fn as_http1(&self) -> Option<&SessionV1> { match self { Self::H1(s) => Some(s), - Self::H2(_) => None, + _ => None, } } pub fn as_http2(&self) -> Option<&SessionV2> { match self { - Self::H1(_) => None, Self::H2(s) => Some(s), + _ => None, + } + } + + pub fn as_http3(&self) -> Option<&SessionV3> { + match self { + Self::H3(s) => Some(s), + _ => None, } } @@ -382,10 +444,13 @@ impl Session { pub async fn write_continue_response(&mut self) -> Result<()> { match self { Self::H1(s) => s.write_continue_response().await, - Self::H2(s) => s.write_response_header( - Box::new(ResponseHeader::build(100, Some(0)).unwrap()), - false, - ), + Self::H2(s) => { + s.write_response_header(Box::new(ResponseHeader::build(100, Some(0))?), false) + } + Self::H3(s) => { + s.write_response_header(Box::new(ResponseHeader::build(100, Some(0))?), false) + .await + } } } @@ -394,6 +459,7 @@ impl Session { match self { Self::H1(s) => s.is_upgrade_req(), Self::H2(_) => false, + Self::H3(_) => false, } } @@ -402,6 +468,7 @@ impl Session { match self { Self::H1(s) => s.body_bytes_sent(), Self::H2(s) => s.body_bytes_sent(), + Self::H3(s) => s.body_bytes_sent(), } } @@ -410,6 +477,7 @@ impl Session { match self { Self::H1(s) => s.body_bytes_read(), Self::H2(s) => s.body_bytes_read(), + Self::H3(s) => s.body_bytes_read(), } } @@ -418,6 +486,7 @@ impl Session { match self { Self::H1(s) => Some(s.digest()), Self::H2(s) => s.digest(), + Self::H3(s) => s.digest(), } } @@ -428,6 +497,7 @@ impl Session { match self { Self::H1(s) => Some(s.digest_mut()), Self::H2(s) => s.digest_mut(), + Self::H3(s) => s.digest_mut(), } } @@ -436,6 +506,7 @@ impl Session { match self { Self::H1(s) => s.client_addr(), Self::H2(s) => s.client_addr(), + Self::H3(s) => s.client_addr(), } } @@ -444,6 +515,7 @@ impl Session { match self { Self::H1(s) => s.server_addr(), Self::H2(s) => s.server_addr(), + Self::H3(s) => s.server_addr(), } } @@ -453,6 +525,7 @@ impl Session { match self { Self::H1(s) => Some(s.stream()), Self::H2(_) => None, + Self::H3(_) => None, } } } diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs new file mode 100644 index 000000000..d2e3e318e --- /dev/null +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -0,0 +1,17 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! HTTP/3 implementation + +pub mod server; diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs new file mode 100644 index 000000000..9cff0d35a --- /dev/null +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -0,0 +1,322 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! HTTP/3 server session + +use crate::protocols::{Digest, SocketAddr, Stream}; +use bytes::Bytes; +use http::uri::PathAndQuery; +use http::HeaderMap; +use pingora_error::Result; +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use crate::protocols::http::v1::client::http_req_header_to_wire; +use pingora_http::{RequestHeader, ResponseHeader}; + +use crate::protocols::http::HttpTask; +pub use quiche::h3::Config as H3Options; + +/// Perform HTTP/3 connection handshake with an established (QUIC) connection. +/// +/// The optional `options` allow to adjust certain HTTP/3 parameters and settings. +/// See [`H3Options`] for more details. +#[allow(unused)] // TODO: remove +pub async fn handshake(io: Stream, options: Option<&H3Options>) -> Result { + Ok(H3Connection { _l4stream: io }) +} + +pub struct H3Connection { + _l4stream: Stream, // ensure the stream will not be dropped until all sessions are +} + +impl H3Connection { + pub async fn graceful_shutdown(&mut self) { + todo!(); + } + pub fn poll_closed(&mut self, _cx: &mut Context<'_>) -> Poll> { + todo!(); + } +} + +/// HTTP/3 server session +#[allow(unused)] // TODO: remove +pub struct HttpSession { + request_header: Option, + // Remember what has been written + response_written: Option>, + + // How many (application, not wire) response body bytes have been sent so far. + body_sent: usize, + + // track if the FIN STREAM frame was already sent + // quiche::Connection::stream_send fin argument + send_ended: bool, + + // digest to record underlying connection info + digest: Arc, +} + +#[allow(unused)] // TODO: remove +impl HttpSession { + /// Create a new [`HttpSession`] from the QUIC connection. + /// This function returns a new HTTP/3 session when the provided HTTP/3 connection, `conn`, + /// establishes a new HTTP/3 stream to this server. + /// + /// A [`Digest`] from the IO stream is also stored in the resulting session, since the + /// session doesn't have access to the underlying stream (and the stream itself isn't + /// accessible from the `h3::server::Connection`). + /// + /// Note: in order to handle all **existing** and new HTTP/3 sessions, the server must call + /// this function in a loop until the client decides to close the connection. + /// + /// `None` will be returned when the connection is closing so that the loop can exit. + /// + pub async fn from_h3_conn( + conn: &mut H3Connection, + digest: Arc, + ) -> Result> { + todo!(); + } + + /// The request sent from the client + /// + /// Different from its HTTP/1.X counterpart, this function never panics as the request is already + /// read when established a new HTTP/3 stream. + pub fn req_header(&self) -> &RequestHeader { + self.request_header.as_ref().unwrap() + } + + /// A mutable reference to request sent from the client + /// + /// Different from its HTTP/1.X counterpart, this function never panics as the request is already + /// read when established a new HTTP/3 stream. + pub fn req_header_mut(&mut self) -> &mut RequestHeader { + self.request_header.as_mut().unwrap() + } + + /// Read request body bytes. `None` when there is no more body to read. + pub async fn read_body_bytes(&mut self) -> Result> { + todo!(); + } + + // the write_* don't have timeouts because the actual writing happens on the connection + // not here. + + /// Write the response header to the client. + /// # the `end` flag + /// `end` marks the end of this session. + /// If the `end` flag is set, no more header or body can be sent to the client. + pub async fn write_response_header( + &mut self, + mut header: Box, + end: bool, + ) -> Result<()> { + todo!(); + } + + /// Write response body to the client. See [Self::write_response_header] for how to use `end`. + pub async fn write_body(&mut self, data: Bytes, end: bool) -> Result<()> { + todo!(); + } + + /// Write response trailers to the client, this also closes the stream. + pub fn write_trailers(&mut self, trailers: HeaderMap) -> Result<()> { + // TODO: use async fn? + todo!(); + } + + /// Similar to [Self::write_response_header], this function takes a reference instead + pub async fn write_response_header_ref( + &mut self, + header: &ResponseHeader, + end: bool, + ) -> Result<()> { + self.write_response_header(Box::new(header.clone()), end) + .await + } + + /// Mark the session end. If no `end` flag is already set before this call, this call will + /// signal the client. Otherwise this call does nothing. + /// + /// Dropping this object without sending `end` will cause an error to the client, which will cause + /// the client to treat this session as bad or incomplete. + pub async fn finish(&mut self) -> Result<()> { + // TODO: check/validate with documentation on protocols::http::server::HttpSession + // TODO: check/validate trailer sending + todo!(); + } + + pub async fn response_duplex_vec(&mut self, tasks: Vec) -> Result { + let mut end_stream = false; + for task in tasks.into_iter() { + end_stream = match task { + HttpTask::Header(header, end) => { + self.write_response_header(header, end) + .await + .map_err(|e| e.into_down())?; + end + } + HttpTask::Body(data, end) => match data { + Some(d) => { + if !d.is_empty() { + self.write_body(d, end).await.map_err(|e| e.into_down())?; + } + end + } + None => end, + }, + HttpTask::Trailer(Some(trailers)) => { + self.write_trailers(*trailers)?; + true + } + HttpTask::Trailer(None) => true, + HttpTask::Done => true, + HttpTask::Failed(e) => { + return Err(e); + } + } || end_stream // safe guard in case `end` in tasks flips from true to false + } + if end_stream { + // no-op if finished already + self.finish().await.map_err(|e| e.into_down())?; + } + Ok(end_stream) + } + + /// Return a string `$METHOD $PATH, Host: $HOST`. Mostly for logging and debug purpose + pub fn request_summary(&self) -> String { + let request_header = self.req_header(); + format!( + "{} {}, Host: {}:{}", + request_header.method, + request_header + .uri + .path_and_query() + .map(PathAndQuery::as_str) + .unwrap_or_default(), + request_header.uri.host().unwrap_or_default(), + request_header + .uri + .port() + .as_ref() + .map(|port| port.as_str()) + .unwrap_or_default() + ) + } + + /// Return the written response header. `None` if it is not written yet. + pub fn response_written(&self) -> Option<&ResponseHeader> { + self.response_written.as_deref() + } + + /// Give up the stream abruptly. + /// + /// This will send a `INTERNAL_ERROR` stream error to the client + pub fn shutdown(&mut self) { + // TODO: check/validate with documentation on protocols::http::server::HttpSession + // TODO: should this set self.ended? it closes the stream which prevents further writes + todo!(); + } + + // This is a hack for pingora-proxy to create subrequests from h2 server session + // TODO: be able to convert from h3 to h1 subrequest + pub fn pseudo_raw_h1_request_header(&self) -> Bytes { + let buf = http_req_header_to_wire(self.req_header()).unwrap(); // safe, None only when version unknown + buf.freeze() + } + + /// Whether there is no more body to read + pub fn is_body_done(&self) -> bool { + todo!(); + } + + /// Whether there is any body to read. + pub fn is_body_empty(&self) -> bool { + todo!(); + } + + pub fn retry_buffer_truncated(&self) -> bool { + todo!(); + } + + pub fn enable_retry_buffering(&mut self) { + todo!(); + } + + pub fn get_retry_buffer(&self) -> Option { + todo!(); + } + + /// `async fn idle() -> Result;` + /// This async fn will be pending forever until the client closes the stream/connection + /// This function is used for watching client status so that the server is able to cancel + /// its internal tasks as the client waiting for the tasks goes away + pub fn idle(&mut self) -> Idle { + Idle(self) + } + + /// Similar to `read_body_bytes()` but will be pending after Ok(None) is returned, + /// until the client closes the connection + pub async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result> { + todo!(); + } + + /// Return how many response body bytes (application, not wire) already sent downstream + pub fn body_bytes_sent(&self) -> usize { + self.body_sent + } + + /// Return how many request body bytes (application, not wire) already read from downstream + pub fn body_bytes_read(&self) -> usize { + todo!(); + } + + /// Return the [Digest] of the connection. + pub fn digest(&self) -> Option<&Digest> { + Some(&self.digest) + } + + /// Return a mutable [Digest] reference for the connection. + pub fn digest_mut(&mut self) -> Option<&mut Digest> { + Arc::get_mut(&mut self.digest) + } + + /// Return the server (local) address recorded in the connection digest. + pub fn server_addr(&self) -> Option<&SocketAddr> { + self.digest.socket_digest.as_ref().map(|d| d.local_addr())? + } + + /// Return the client (peer) address recorded in the connection digest. + pub fn client_addr(&self) -> Option<&SocketAddr> { + self.digest.socket_digest.as_ref().map(|d| d.peer_addr())? + } +} + +/// The future to poll for an idle session. +/// +/// Calling `.await` in this object will not return until the client decides to close this stream. +#[allow(unused)] // TODO: remove +pub struct Idle<'a>(&'a mut HttpSession); + +#[allow(unused)] // TODO: remove +impl<'a> Future for Idle<'a> { + type Output = u64; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + todo!(); + } +} diff --git a/pingora-core/src/protocols/l4/quic.rs b/pingora-core/src/protocols/l4/quic.rs index 1adf55525..f3d3d601c 100644 --- a/pingora-core/src/protocols/l4/quic.rs +++ b/pingora-core/src/protocols/l4/quic.rs @@ -1,7 +1,25 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! QUIC Listener using cloudflare/quiche + use crate::protocols::l4::stream::Stream as L4Stream; +use crate::protocols::{ConnectionState as ConnectionStateTrait, QuicConnectionState}; +use parking_lot::Mutex; +use pingora_error::{Error, ErrorType, Result}; use std::fmt::{Debug, Formatter}; use std::io; -use std::io::Error; use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; use std::pin::Pin; @@ -11,23 +29,30 @@ use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio::net::UdpSocket; pub struct Listener { - io: Arc, + socket: Arc, } impl From for Listener { fn from(io: UdpSocket) -> Self { - Listener { io: Arc::new(io) } + Listener { + socket: Arc::new(io), + } } } impl Listener { - pub(crate) async fn accept(&self) -> io::Result<(L4Stream, SocketAddr)> { + pub(crate) async fn accept(&self) -> std::io::Result<(L4Stream, SocketAddr)> { // TODO: SocketAddr should be remote addr - let addr = self.io.local_addr()?; + let addr = self.socket.local_addr()?; Ok(( QuicConnection { - io: self.io.clone(), + socket: self.socket.clone(), + state: Arc::new(Mutex::new(QuicConnectionState::Incoming( + PreHandshakeState { + socket: self.socket.clone(), + }, + ))), } .into(), addr, @@ -35,24 +60,33 @@ impl Listener { } pub(super) fn get_raw_fd(&self) -> RawFd { - self.io.as_raw_fd() + self.socket.as_raw_fd() } } impl AsRawFd for QuicConnection { fn as_raw_fd(&self) -> RawFd { - self.io.as_raw_fd() + self.socket.as_raw_fd() } } impl Debug for Listener { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Listener").field("io", &self.io).finish() + f.debug_struct("Listener") + .field("io", &self.socket) + .finish() } } pub(crate) struct QuicConnection { - pub(crate) io: Arc, + socket: Arc, + state: Arc>, +} + +impl QuicConnection { + pub(crate) fn local_addr(&self) -> io::Result { + self.socket.local_addr() + } } impl Debug for QuicConnection { @@ -67,15 +101,18 @@ impl AsyncWrite for QuicConnection { self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &[u8], - ) -> Poll> { + ) -> Poll> { todo!() } - fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { todo!() } - fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + fn poll_shutdown( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { todo!() } } @@ -86,7 +123,58 @@ impl AsyncRead for QuicConnection { self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>, - ) -> Poll> { + ) -> Poll> { todo!() } } + +#[allow(unused)] // TODO: remove +pub enum ConnectionState { + Incoming(PreHandshakeState), + Established(EstablishedState), +} + +#[allow(unused)] // TODO: remove +pub struct PreHandshakeState { + socket: Arc, +} + +#[allow(unused)] // TODO: remove +pub struct EstablishedState { + socket: Arc, +} + +impl ConnectionStateTrait for QuicConnection { + fn quic_connection_state(&self) -> Option>> { + Some(self.state.clone()) + } +} + +impl Into> for ConnectionState { + fn into(self) -> Option { + match self { + ConnectionState::Incoming(s) => Some(s), + ConnectionState::Established(_) => None, + } + } +} + +impl Into> for ConnectionState { + fn into(self) -> Option { + match self { + ConnectionState::Incoming(_) => None, + ConnectionState::Established(s) => Some(s), + } + } +} + +pub(crate) fn handshake(state: Arc>) -> Result<()> { + let state = state.lock(); + + let Some(_s) = state.into() else { + debug_assert!(false, "quic::handshake on already established connection"); + return Err(Error::new(ErrorType::HandshakeError)); + }; + + Ok(()) +} diff --git a/pingora-core/src/protocols/l4/stream.rs b/pingora-core/src/protocols/l4/stream.rs index e149f36d2..3f1fd47a1 100644 --- a/pingora-core/src/protocols/l4/stream.rs +++ b/pingora-core/src/protocols/l4/stream.rs @@ -18,6 +18,7 @@ use async_trait::async_trait; use futures::FutureExt; use log::{debug, error}; +use parking_lot::Mutex; use pingora_error::{ErrorType::*, OrErr, Result}; #[cfg(target_os = "linux")] use std::io::IoSliceMut; @@ -40,8 +41,8 @@ use crate::protocols::l4::ext::{set_tcp_keepalive, TcpKeepalive}; use crate::protocols::l4::quic::QuicConnection; use crate::protocols::raw_connect::ProxyDigest; use crate::protocols::{ - GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown, SocketDigest, Ssl, - TimingDigest, UniqueID, UniqueIDType, + ConnectionState, GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, QuicConnectionState, + Shutdown, SocketDigest, Ssl, TimingDigest, UniqueID, UniqueIDType, }; use crate::upstreams::peer::Tracer; @@ -521,6 +522,16 @@ impl UniqueID for Stream { } } +impl ConnectionState for Stream { + fn quic_connection_state(&self) -> Option>> { + match &self.stream.get_ref().stream { + RawStream::Quic(s) => s.quic_connection_state(), + RawStream::Tcp(_) => None, + RawStream::Unix(_) => None, + } + } +} + impl Ssl for Stream {} #[async_trait] @@ -588,7 +599,7 @@ impl Drop for Stream { } /* use nodelay/local_addr function to detect socket status */ let ret = match &self.stream.get_ref().stream { - RawStream::Quic(s) => s.io.local_addr().err(), + RawStream::Quic(s) => s.local_addr().err(), RawStream::Tcp(s) => s.nodelay().err(), #[cfg(unix)] RawStream::Unix(s) => s.local_addr().err(), diff --git a/pingora-core/src/protocols/mod.rs b/pingora-core/src/protocols/mod.rs index 007675b3d..8c9a995f4 100644 --- a/pingora-core/src/protocols/mod.rs +++ b/pingora-core/src/protocols/mod.rs @@ -30,6 +30,7 @@ pub use l4::ext::TcpKeepalive; pub use tls::ALPN; use async_trait::async_trait; +use l4::quic::ConnectionState as QuicConnectionState; use std::fmt::Debug; use std::net::{IpAddr, Ipv4Addr}; use std::sync::Arc; @@ -52,6 +53,13 @@ pub trait UniqueID { fn id(&self) -> UniqueIDType; } +/// Interface to get the raw connection for e.g. non-connection based network protocols like UDP/QUIC +pub trait ConnectionState { + fn quic_connection_state(&self) -> Option>> { + None + } +} + /// Interface to get TLS info pub trait Ssl { /// Return the TLS info if the connection is over TLS @@ -90,6 +98,7 @@ pub trait IO: + AsyncWrite + Shutdown + UniqueID + + ConnectionState + Ssl + GetTimingDigest + GetProxyDigest @@ -111,6 +120,7 @@ impl< + AsyncWrite + Shutdown + UniqueID + + ConnectionState + Ssl + GetTimingDigest + GetProxyDigest @@ -149,6 +159,7 @@ mod ext_io_impl { 0 } } + impl ConnectionState for Mock {} impl Ssl for Mock {} impl GetTimingDigest for Mock { fn get_timing_digest(&self) -> Vec> { @@ -208,6 +219,7 @@ mod ext_io_impl { 0 } } + impl ConnectionState for DuplexStream {} impl Ssl for DuplexStream {} impl GetTimingDigest for DuplexStream { fn get_timing_digest(&self) -> Vec> { @@ -238,18 +250,18 @@ pub(crate) trait ConnSockReusable { fn check_sock_match(&self, sock: V) -> bool; } +use crate::protocols::tls::TlsRef; use l4::socket::SocketAddr; use log::{debug, error}; #[cfg(unix)] use nix::sys::socket::{getpeername, SockaddrStorage, UnixAddr}; +use parking_lot::Mutex; #[cfg(unix)] use std::os::unix::prelude::AsRawFd; #[cfg(windows)] use std::os::windows::io::AsRawSocket; use std::{net::SocketAddr as InetSocketAddr, path::Path}; -use crate::protocols::tls::TlsRef; - #[cfg(unix)] impl ConnFdReusable for SocketAddr { fn check_fd_match(&self, fd: V) -> bool { diff --git a/pingora-core/src/protocols/tls/boringssl_openssl/stream.rs b/pingora-core/src/protocols/tls/boringssl_openssl/stream.rs index 38c302713..50e9c4107 100644 --- a/pingora-core/src/protocols/tls/boringssl_openssl/stream.rs +++ b/pingora-core/src/protocols/tls/boringssl_openssl/stream.rs @@ -14,7 +14,7 @@ use crate::protocols::digest::TimingDigest; use crate::protocols::tls::{SslDigest, ALPN}; -use crate::protocols::{Peek, Ssl, UniqueID, UniqueIDType}; +use crate::protocols::{ConnectionState, Peek, Ssl, UniqueID, UniqueIDType}; use crate::tls::{self, ssl, tokio_ssl::SslStream as InnerSsl}; use crate::utils::tls::{get_organization, get_serial}; use log::warn; @@ -169,6 +169,8 @@ where } } +impl ConnectionState for SslStream {} + impl Ssl for SslStream { fn get_ssl(&self) -> Option<&ssl::SslRef> { Some(self.ssl()) diff --git a/pingora-core/src/protocols/tls/noop_tls/mod.rs b/pingora-core/src/protocols/tls/noop_tls/mod.rs index ee34a0c65..ebe63cfde 100644 --- a/pingora-core/src/protocols/tls/noop_tls/mod.rs +++ b/pingora-core/src/protocols/tls/noop_tls/mod.rs @@ -108,7 +108,8 @@ pub mod stream { use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use crate::protocols::{ - GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown, Ssl, UniqueID, + ConnectionState, GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown, Ssl, + UniqueID, }; /// A TLS session over a stream. @@ -173,6 +174,8 @@ pub mod stream { } } + impl ConnectionState for SslStream {} + impl Ssl for SslStream {} impl GetTimingDigest for SslStream { diff --git a/pingora-core/src/protocols/tls/rustls/stream.rs b/pingora-core/src/protocols/tls/rustls/stream.rs index 7361bdb88..d9ac4d3bd 100644 --- a/pingora-core/src/protocols/tls/rustls/stream.rs +++ b/pingora-core/src/protocols/tls/rustls/stream.rs @@ -21,7 +21,7 @@ use std::time::{Duration, SystemTime}; use crate::listeners::tls::Acceptor; use crate::protocols::raw_connect::ProxyDigest; -use crate::protocols::{tls::SslDigest, Peek, TimingDigest, UniqueIDType}; +use crate::protocols::{tls::SslDigest, ConnectionState, Peek, TimingDigest, UniqueIDType}; use crate::protocols::{ GetProxyDigest, GetSocketDigest, GetTimingDigest, SocketDigest, Ssl, UniqueID, ALPN, }; @@ -221,6 +221,8 @@ where } } +impl ConnectionState for TlsStream {} + impl Ssl for TlsStream { fn get_ssl_digest(&self) -> Option> { self.ssl_digest() diff --git a/pingora-proxy/src/subrequest.rs b/pingora-proxy/src/subrequest.rs index 60a5c7311..573b32d56 100644 --- a/pingora-proxy/src/subrequest.rs +++ b/pingora-proxy/src/subrequest.rs @@ -18,8 +18,8 @@ use core::task::{Context, Poll}; use pingora_cache::lock::WritePermit; use pingora_core::protocols::raw_connect::ProxyDigest; use pingora_core::protocols::{ - GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, SocketDigest, Ssl, TimingDigest, - UniqueID, UniqueIDType, + ConnectionState, GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, SocketDigest, Ssl, + TimingDigest, UniqueID, UniqueIDType, }; use std::io::Cursor; use std::sync::Arc; @@ -74,6 +74,8 @@ impl UniqueID for DummyIO { } } +impl ConnectionState for DummyIO {} + impl Ssl for DummyIO {} impl GetTimingDigest for DummyIO { diff --git a/pingora-proxy/tests/utils/server_utils.rs b/pingora-proxy/tests/utils/server_utils.rs index 0c5a4e52f..e6cff56fc 100644 --- a/pingora-proxy/tests/utils/server_utils.rs +++ b/pingora-proxy/tests/utils/server_utils.rs @@ -29,6 +29,7 @@ use pingora_cache::{ use pingora_cache::{ForcedInvalidationKind, PurgeType, VarianceBuilder}; use pingora_core::apps::{HttpServerApp, HttpServerOptions}; use pingora_core::modules::http::compression::ResponseCompression; +use pingora_core::protocols::http::HttpVersion; use pingora_core::protocols::{l4::socket::SocketAddr, Digest}; use pingora_core::server::configuration::Opt; use pingora_core::services::Service; @@ -571,7 +572,7 @@ fn test_main() { let http_logic = proxy_service_h2c.app_logic_mut().unwrap(); let mut http_server_options = HttpServerOptions::default(); - http_server_options.h2c = true; + http_server_options.http_version = HttpVersion::V2; http_logic.server_options = Some(http_server_options); proxy_service_h2c.add_tcp("0.0.0.0:6146"); From 1ed9ff72f76de09faed80110d1854e0b86a04af7 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 3 Jan 2025 13:02:57 +0100 Subject: [PATCH 03/52] import helpers from cloudflare/quiche@0570ab83 sources cloudflare/quiche@0570ab83/apps/src/bin/quiche-server.rs cloudflare/quiche@0570ab83/apps/src/sendto.rs adjust for tokio::net::UdpSocket --- pingora-core/src/protocols/l4/quic/sendto.rs | 167 ++++++++++++++++++ .../protocols/l4/quic_internals/id_token.rs | 83 +++++++++ .../src/protocols/l4/quic_internals/mod.rs | 2 + 3 files changed, 252 insertions(+) create mode 100644 pingora-core/src/protocols/l4/quic/sendto.rs create mode 100644 pingora-core/src/protocols/l4/quic_internals/id_token.rs create mode 100644 pingora-core/src/protocols/l4/quic_internals/mod.rs diff --git a/pingora-core/src/protocols/l4/quic/sendto.rs b/pingora-core/src/protocols/l4/quic/sendto.rs new file mode 100644 index 000000000..f2a7d1aa2 --- /dev/null +++ b/pingora-core/src/protocols/l4/quic/sendto.rs @@ -0,0 +1,167 @@ +// Copyright (C) 2021, Cloudflare, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +use std::cmp; +use std::io; +use std::os::fd::AsRawFd; + +/// For Linux, try to detect GSO is available. +#[cfg(target_os = "linux")] +pub fn detect_gso(socket: &tokio::net::UdpSocket, segment_size: usize) -> bool { + use nix::sys::socket::setsockopt; + use nix::sys::socket::sockopt::UdpGsoSegment; + + setsockopt(socket.as_raw_fd(), UdpGsoSegment, &(segment_size as i32)).is_ok() +} + +/// For non-Linux, there is no GSO support. +#[cfg(not(target_os = "linux"))] +pub fn detect_gso(_socket: &mio::net::UdpSocket, _segment_size: usize) -> bool { + false +} + +/// Send packets using sendmsg() with GSO. +#[cfg(target_os = "linux")] +fn send_to_gso_pacing( + socket: &tokio::net::UdpSocket, buf: &[u8], send_info: &quiche::SendInfo, + segment_size: usize, +) -> io::Result { + use nix::sys::socket::sendmsg; + use nix::sys::socket::ControlMessage; + use nix::sys::socket::MsgFlags; + use nix::sys::socket::SockaddrStorage; + use std::io::IoSlice; + use std::os::unix::io::AsRawFd; + + let iov = [IoSlice::new(buf)]; + let segment_size = segment_size as u16; + let dst = SockaddrStorage::from(send_info.to); + let sockfd = socket.as_raw_fd(); + + // GSO option. + let cmsg_gso = ControlMessage::UdpGsoSegments(&segment_size); + + // Pacing option. + //let send_time = std_time_to_u64(&send_info.at); + //let cmsg_txtime = ControlMessage::TxTime(&send_time); + + match sendmsg( + sockfd, + &iov, + // &[cmsg_gso, cmsg_txtime], + &[cmsg_gso], + MsgFlags::empty(), + Some(&dst), + ) { + Ok(v) => Ok(v), + Err(e) => Err(e.into()), + } +} + +/// For non-Linux platforms. +#[cfg(not(target_os = "linux"))] +fn send_to_gso_pacing( + _socket: &mio::net::UdpSocket, _buf: &[u8], _send_info: &quiche::SendInfo, + _segment_size: usize, +) -> io::Result { + panic!("send_to_gso() should not be called on non-linux platforms"); +} + +/// A wrapper function of send_to(). +/// +/// When GSO and SO_TXTIME are enabled, send packets using send_to_gso(). +/// Otherwise, send packets using socket.send_to(). +pub async fn send_to( + socket: &tokio::net::UdpSocket, buf: &[u8], send_info: &quiche::SendInfo, + segment_size: usize, pacing: bool, enable_gso: bool, +) -> io::Result { + if pacing && enable_gso { + match send_to_gso_pacing(socket, buf, send_info, segment_size) { + Ok(v) => { + return Ok(v); + }, + Err(e) => { + return Err(e); + }, + } + } + + let mut off = 0; + let mut left = buf.len(); + let mut written = 0; + + while left > 0 { + let pkt_len = cmp::min(left, segment_size); + + match socket.send_to(&buf[off..off + pkt_len], send_info.to).await { + Ok(v) => { + written += v; + }, + Err(e) => return Err(e), + } + + off += pkt_len; + left -= pkt_len; + } + + Ok(written) +} + +#[cfg(target_os = "linux")] +fn std_time_to_u64(time: &std::time::Instant) -> u64 { + const NANOS_PER_SEC: u64 = 1_000_000_000; + + const INSTANT_ZERO: std::time::Instant = + unsafe { std::mem::transmute(std::time::UNIX_EPOCH) }; + + let raw_time = time.duration_since(INSTANT_ZERO); + + let sec = raw_time.as_secs(); + let nsec = raw_time.subsec_nanos(); + + sec * NANOS_PER_SEC + nsec as u64 +} + +/// Set SO_TXTIME socket option. +/// +/// This socket option is set to send to kernel the outgoing UDP +/// packet transmission time in the sendmsg syscall. +/// +/// Note that this socket option is set only on linux platforms. +#[cfg(target_os = "linux")] +pub fn set_txtime_sockopt(sock: &tokio::net::UdpSocket) -> io::Result<()> { + use nix::sys::socket::setsockopt; + use nix::sys::socket::sockopt::TxTime; + + let config = nix::libc::sock_txtime { + clockid: nix::libc::CLOCK_MONOTONIC, + flags: 0, + }; + + setsockopt(sock.as_raw_fd(), TxTime, &config)?; + + Ok(()) +} \ No newline at end of file diff --git a/pingora-core/src/protocols/l4/quic_internals/id_token.rs b/pingora-core/src/protocols/l4/quic_internals/id_token.rs new file mode 100644 index 000000000..0a95edad5 --- /dev/null +++ b/pingora-core/src/protocols/l4/quic_internals/id_token.rs @@ -0,0 +1,83 @@ +// Copyright (C) 2020, Cloudflare, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +use std::net; + +/// Generate a stateless retry token. +/// +/// The token includes the static string `"quiche"` followed by the IP address +/// of the client and by the original destination connection ID generated by the +/// client. +/// +/// Note that this function is only an example and doesn't do any cryptographic +/// authenticate of the token. *It should not be used in production system*. +fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec { + let mut token = Vec::new(); + + token.extend_from_slice(b"quiche"); + + let addr = match src.ip() { + std::net::IpAddr::V4(a) => a.octets().to_vec(), + std::net::IpAddr::V6(a) => a.octets().to_vec(), + }; + + token.extend_from_slice(&addr); + token.extend_from_slice(&hdr.dcid); + + token +} + +/// Validates a stateless retry token. +/// +/// This checks that the ticket includes the `"quiche"` static string, and that +/// the client IP address matches the address stored in the ticket. +/// +/// Note that this function is only an example and doesn't do any cryptographic +/// authenticate of the token. *It should not be used in production system*. +fn validate_token<'a>( + src: &net::SocketAddr, token: &'a [u8], +) -> Option> { + if token.len() < 6 { + return None; + } + + if &token[..6] != b"quiche" { + return None; + } + + let token = &token[6..]; + + let addr = match src.ip() { + std::net::IpAddr::V4(a) => a.octets().to_vec(), + std::net::IpAddr::V6(a) => a.octets().to_vec(), + }; + + if token.len() < addr.len() || &token[..addr.len()] != addr.as_slice() { + return None; + } + + Some(quiche::ConnectionId::from_ref(&token[addr.len()..])) +} \ No newline at end of file diff --git a/pingora-core/src/protocols/l4/quic_internals/mod.rs b/pingora-core/src/protocols/l4/quic_internals/mod.rs new file mode 100644 index 000000000..174b5db7c --- /dev/null +++ b/pingora-core/src/protocols/l4/quic_internals/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod sendto; +pub(crate) mod id_token; \ No newline at end of file From 7e5d62b4cccc0daa3424147f119a3cd337e6fcbc Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 3 Jan 2025 16:30:00 +0100 Subject: [PATCH 04/52] add initial Quic TLS handshake and connection state transition logic refactor quic.rs to into a module to enhance structure quic::Listener maintains a map with ConnectionIds => ConnectionHandles ConnectionHandles correspond to Connections, both having states like `Incoming` & `Established` the ConnectionHandles are used to forward UDP datagrams to the according quic::Connection while the connection is in the `Incoming` state the data is sent through mpcs::channels, once `Established` the data is directly received on the quiche::Connection possible enhancements: use type state pattern during quic::tls_handshake move tls_handshake to protocols::l4::tls::quic --- pingora-core/src/connectors/mod.rs | 2 + pingora-core/src/listeners/l4.rs | 4 +- pingora-core/src/listeners/mod.rs | 8 +- pingora-core/src/protocols/l4/quic.rs | 180 -------- .../l4/{quic_internals => quic}/id_token.rs | 14 +- pingora-core/src/protocols/l4/quic/mod.rs | 432 ++++++++++++++++++ pingora-core/src/protocols/l4/quic/sendto.rs | 13 +- .../src/protocols/l4/quic/settings.rs | 94 ++++ .../src/protocols/l4/quic/tls_handshake.rs | 280 ++++++++++++ .../src/protocols/l4/quic_internals/mod.rs | 2 - pingora-core/src/protocols/l4/stream.rs | 22 +- pingora-core/src/protocols/mod.rs | 8 +- 12 files changed, 844 insertions(+), 215 deletions(-) delete mode 100644 pingora-core/src/protocols/l4/quic.rs rename pingora-core/src/protocols/l4/{quic_internals => quic}/id_token.rs (86%) create mode 100644 pingora-core/src/protocols/l4/quic/mod.rs create mode 100644 pingora-core/src/protocols/l4/quic/settings.rs create mode 100644 pingora-core/src/protocols/l4/quic/tls_handshake.rs delete mode 100644 pingora-core/src/protocols/l4/quic_internals/mod.rs diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index 5a126cc70..dd18c217e 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -536,10 +536,12 @@ mod tests { assert!(!context.contains("total-connection timeout")); } + /* #[tokio::test] async fn test_do_connect_without_total_timeout() { let peer = BasicPeer::new(BLACK_HOLE); let (etype, context) = get_do_connect_failure_with_peer(&peer).await; assert!(etype != ConnectTimedout || !context.contains("total-connection timeout")); } + */ } diff --git a/pingora-core/src/listeners/l4.rs b/pingora-core/src/listeners/l4.rs index 2ce2fbb87..3b7ac9b97 100644 --- a/pingora-core/src/listeners/l4.rs +++ b/pingora-core/src/listeners/l4.rs @@ -234,7 +234,7 @@ fn from_raw_fd(address: &ServerAddress, fd: i32) -> Result { ServerProtocol::Quic => { let socket = UdpSocket::from_std(std_listener_socket) .or_err_with(BindError, || format!("Listen() failed on {address:?}"))?; - Ok(QuicListener::from(socket).into()) + Ok(QuicListener::try_from(socket)?.into()) } } } @@ -377,7 +377,7 @@ async fn bind(addr: &ServerAddress) -> Result { .or_err(BindError, "bind() failed")?; let tokio_socket = UdpSocket::try_from(std_socket) .or_err(BindError, "failed to create UdpSocket")?; - Ok(Listener::from(QuicListener::from(tokio_socket))) + Ok(Listener::from(QuicListener::try_from(tokio_socket)?)) } }, } diff --git a/pingora-core/src/listeners/mod.rs b/pingora-core/src/listeners/mod.rs index e26844774..3b70204b3 100644 --- a/pingora-core/src/listeners/mod.rs +++ b/pingora-core/src/listeners/mod.rs @@ -23,7 +23,7 @@ pub mod tls; pub use crate::tls::listeners as tls; use crate::protocols::{tls::TlsRef, ConnectionState, Stream}; -use crate::protocols::l4::quic::handshake as quic_handshake; +use crate::protocols::l4::quic::tls_handshake::handshake as quic_handshake; #[cfg(unix)] use crate::server::ListenFds; @@ -113,9 +113,9 @@ impl UninitializedStream { if let Some(tls) = self.tls { let tls_stream = tls.tls_handshake(self.l4).await?; Ok(Box::new(tls_stream)) - } else if let Some(state) = self.l4.quic_connection_state() { - quic_handshake(state)?; - Ok(Box::new(self.l4)) + } else if self.l4.is_quic_connection() { + let quic_stream = quic_handshake(self.l4).await?; + Ok(Box::new(quic_stream)) } else { Ok(Box::new(self.l4)) } diff --git a/pingora-core/src/protocols/l4/quic.rs b/pingora-core/src/protocols/l4/quic.rs deleted file mode 100644 index f3d3d601c..000000000 --- a/pingora-core/src/protocols/l4/quic.rs +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2024 Cloudflare, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! QUIC Listener using cloudflare/quiche - -use crate::protocols::l4::stream::Stream as L4Stream; -use crate::protocols::{ConnectionState as ConnectionStateTrait, QuicConnectionState}; -use parking_lot::Mutex; -use pingora_error::{Error, ErrorType, Result}; -use std::fmt::{Debug, Formatter}; -use std::io; -use std::net::SocketAddr; -use std::os::fd::{AsRawFd, RawFd}; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; -use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; -use tokio::net::UdpSocket; - -pub struct Listener { - socket: Arc, -} - -impl From for Listener { - fn from(io: UdpSocket) -> Self { - Listener { - socket: Arc::new(io), - } - } -} - -impl Listener { - pub(crate) async fn accept(&self) -> std::io::Result<(L4Stream, SocketAddr)> { - // TODO: SocketAddr should be remote addr - let addr = self.socket.local_addr()?; - - Ok(( - QuicConnection { - socket: self.socket.clone(), - state: Arc::new(Mutex::new(QuicConnectionState::Incoming( - PreHandshakeState { - socket: self.socket.clone(), - }, - ))), - } - .into(), - addr, - )) - } - - pub(super) fn get_raw_fd(&self) -> RawFd { - self.socket.as_raw_fd() - } -} - -impl AsRawFd for QuicConnection { - fn as_raw_fd(&self) -> RawFd { - self.socket.as_raw_fd() - } -} - -impl Debug for Listener { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Listener") - .field("io", &self.socket) - .finish() - } -} - -pub(crate) struct QuicConnection { - socket: Arc, - state: Arc>, -} - -impl QuicConnection { - pub(crate) fn local_addr(&self) -> io::Result { - self.socket.local_addr() - } -} - -impl Debug for QuicConnection { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("QuicConnection").finish() - } -} - -#[allow(unused_variables)] // TODO: remove -impl AsyncWrite for QuicConnection { - fn poll_write( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &[u8], - ) -> Poll> { - todo!() - } - - fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - todo!() - } - - fn poll_shutdown( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { - todo!() - } -} - -#[allow(unused_variables)] // TODO: remove -impl AsyncRead for QuicConnection { - fn poll_read( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &mut ReadBuf<'_>, - ) -> Poll> { - todo!() - } -} - -#[allow(unused)] // TODO: remove -pub enum ConnectionState { - Incoming(PreHandshakeState), - Established(EstablishedState), -} - -#[allow(unused)] // TODO: remove -pub struct PreHandshakeState { - socket: Arc, -} - -#[allow(unused)] // TODO: remove -pub struct EstablishedState { - socket: Arc, -} - -impl ConnectionStateTrait for QuicConnection { - fn quic_connection_state(&self) -> Option>> { - Some(self.state.clone()) - } -} - -impl Into> for ConnectionState { - fn into(self) -> Option { - match self { - ConnectionState::Incoming(s) => Some(s), - ConnectionState::Established(_) => None, - } - } -} - -impl Into> for ConnectionState { - fn into(self) -> Option { - match self { - ConnectionState::Incoming(_) => None, - ConnectionState::Established(s) => Some(s), - } - } -} - -pub(crate) fn handshake(state: Arc>) -> Result<()> { - let state = state.lock(); - - let Some(_s) = state.into() else { - debug_assert!(false, "quic::handshake on already established connection"); - return Err(Error::new(ErrorType::HandshakeError)); - }; - - Ok(()) -} diff --git a/pingora-core/src/protocols/l4/quic_internals/id_token.rs b/pingora-core/src/protocols/l4/quic/id_token.rs similarity index 86% rename from pingora-core/src/protocols/l4/quic_internals/id_token.rs rename to pingora-core/src/protocols/l4/quic/id_token.rs index 0a95edad5..6fec23f68 100644 --- a/pingora-core/src/protocols/l4/quic_internals/id_token.rs +++ b/pingora-core/src/protocols/l4/quic/id_token.rs @@ -34,14 +34,15 @@ use std::net; /// /// Note that this function is only an example and doesn't do any cryptographic /// authenticate of the token. *It should not be used in production system*. -fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec { +pub(super) fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec { + // TODO: implement token generation/validation using crypto let mut token = Vec::new(); token.extend_from_slice(b"quiche"); let addr = match src.ip() { - std::net::IpAddr::V4(a) => a.octets().to_vec(), - std::net::IpAddr::V6(a) => a.octets().to_vec(), + net::IpAddr::V4(a) => a.octets().to_vec(), + net::IpAddr::V6(a) => a.octets().to_vec(), }; token.extend_from_slice(&addr); @@ -57,9 +58,10 @@ fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec { /// /// Note that this function is only an example and doesn't do any cryptographic /// authenticate of the token. *It should not be used in production system*. -fn validate_token<'a>( +pub(super) fn validate_token<'a>( src: &net::SocketAddr, token: &'a [u8], ) -> Option> { + // TODO: implement token generation/validation using crypto if token.len() < 6 { return None; } @@ -71,8 +73,8 @@ fn validate_token<'a>( let token = &token[6..]; let addr = match src.ip() { - std::net::IpAddr::V4(a) => a.octets().to_vec(), - std::net::IpAddr::V6(a) => a.octets().to_vec(), + net::IpAddr::V4(a) => a.octets().to_vec(), + net::IpAddr::V6(a) => a.octets().to_vec(), }; if token.len() < addr.len() || &token[..addr.len()] != addr.as_slice() { diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs new file mode 100644 index 000000000..88ec75b73 --- /dev/null +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -0,0 +1,432 @@ +use std::collections::HashMap; +use std::{io, mem}; +use std::fmt::{Debug, Formatter}; +use std::net::SocketAddr; +use std::os::fd::{AsRawFd, RawFd}; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll, Waker}; +use log::{debug, error, trace, warn}; +use parking_lot::Mutex; +use quiche::{Config, ConnectionId, Header, RecvInfo, Type}; +use ring::hmac::Key; +use ring::rand::SystemRandom; +use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; +use tokio::net::UdpSocket; +use tokio::sync::mpsc::{channel, Receiver, Sender}; +use tokio::sync::mpsc::error::TryRecvError; +use tokio::sync::Notify; +use pingora_error::{BError, Error, ErrorType}; +use quiche::Connection as QuicheConnection; +use settings::Settings as QuicSettings; + +#[allow(unused)] // TODO: remove +mod sendto; +mod id_token; +pub(crate) mod tls_handshake; +mod settings; + +use crate::protocols::ConnectionState; +use crate::protocols::l4::stream::Stream as L4Stream; + +// UDP header 8 bytes, IPv4 Header 20 bytes +//pub const MAX_IPV4_BUF_SIZE: usize = 65507; +// UDP header 8 bytes, IPv6 Header 40 bytes +pub const MAX_IPV6_BUF_SIZE: usize = 65487; + +// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472. +//pub const MAX_IPV4_UDP_PACKET_SIZE: usize = 1472; +// 1500(Ethernet) - 40(IPv6 header) - 8(UDP header) = 1452 +pub const MAX_IPV6_UDP_PACKET_SIZE: usize = 1452; + +//pub const MAX_IPV4_QUIC_DATAGRAM_SIZE: usize = 1370; +pub const MAX_IPV6_QUIC_DATAGRAM_SIZE: usize = 1350; + +const HANDSHAKE_PACKET_BUFFER_SIZE: usize = 64; + +pub struct Listener { + socket: Arc, + socket_addr: SocketAddr, + + config: Arc>, + crypto: Crypto, + + connections: Mutex, ConnectionHandle>>, +} + +pub struct Crypto { + key: Key, +} + +pub enum Connection { + Incoming(IncomingState), + Established(EstablishedState), +} + +pub struct IncomingState { + id: ConnectionId<'static>, + config: Arc>, + + socket: Arc, + udp_rx: Receiver, + response_tx: Sender, + + dgram: UdpRecv, + + ignore: bool, + reject: bool +} + +pub struct EstablishedState { + socket: Arc, + connection: Arc>, + tx_notify: Arc, + rx_waker: Arc>> +} + +pub enum ConnectionHandle { + Incoming(IncomingHandle), + Established(EstablishedHandle), +} + +impl Debug for ConnectionHandle { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str("ConnectionHandle")?; + match self { + ConnectionHandle::Incoming(_) => f.write_str("::Incoming"), + ConnectionHandle::Established(_) => f.write_str("::Established"), + } + } +} + +pub struct IncomingHandle { + udp_tx: Sender, + response_rx: Receiver, +} + +pub(crate) enum HandshakeResponse { + Established(EstablishedHandle), + Ignored, + Rejected, + // TODO: TimedOut, +} + +#[derive(Clone)] +pub struct EstablishedHandle { + connection: Arc>, + rx_waker: Arc>>, + tx_notify: Arc, +} + +pub struct UdpRecv { + pub(crate) pkt: Vec, + pub(crate) header: Header<'static>, + pub(crate) recv_info: RecvInfo, +} + +impl TryFrom for Listener { + type Error = BError; + + fn try_from(io: UdpSocket) -> pingora_error::Result { + let addr = io.local_addr() + .map_err(|e| Error::explain( + ErrorType::SocketError, + format!("failed to get local address from socket: {}", e)))?; + let rng = SystemRandom::new(); + let key = Key::generate(ring::hmac::HMAC_SHA256, &rng) + .map_err(|e| Error::explain( + ErrorType::InternalError, + format!("failed to generate listener key: {}", e)))?; + + let settings = QuicSettings::try_default()?; + + Ok(Listener { + socket: Arc::new(io), + socket_addr: addr, + + config: settings.get_config(), + crypto: Crypto { + key + }, + + connections: Default::default(), + }) + } +} + +impl Listener { + pub(crate) async fn accept(&self) -> io::Result<(L4Stream, SocketAddr)> { + let mut rx_buf = [0u8; MAX_IPV6_BUF_SIZE]; + + trace!("endpoint rx loop"); + 'read: loop { + // receive from network and parse Quic header + let (size, from) = self.socket.recv_from(&mut rx_buf).await?; + + // parse the Quic packet's header + let header = match Header::from_slice(rx_buf[..size].as_mut(), quiche::MAX_CONN_ID_LEN) { + Ok(hdr) => hdr, + Err(e) => { + warn!("Parsing Quic packet header failed with error: {:?}.", e); + trace!("Dropped packet due to invalid header. Continuing..."); + continue 'read; + } + }; + trace!("dgram received from={} length={}", from, size); + + // TODO: allow for connection id updates during lifetime + // connection needs to be able to update source_ids() or destination_ids() + + let recv_info = RecvInfo { + to: self.socket_addr, + from, + }; + + let mut conn_id = header.dcid.clone(); + let mut udp_tx = None; + { + let mut connections = self.connections.lock(); + // send to corresponding connection + let mut handle; + handle = connections.get_mut(&conn_id); + if handle.is_none() { + conn_id = Self::gen_cid(&self.crypto.key, &header); + handle = connections.get_mut(&conn_id); + }; + if let Some(handle) = handle { + trace!("existing connection {:?} {:?}", conn_id, handle); + match handle { + ConnectionHandle::Incoming(i) => { + match i.response_rx.try_recv() { + Ok(msg) => { + match msg { + HandshakeResponse::Established(e) => { + // receive data into existing connection + Self::recv_connection(e.connection.as_ref(), &mut rx_buf[..size], recv_info)?; + // transition connection + handle.establish(e) + } + HandshakeResponse::Ignored + | HandshakeResponse::Rejected => { + connections.remove(&header.dcid); + continue 'read + } + } + } + Err(e) => { + match e { + TryRecvError::Empty => { + udp_tx = Some(i.udp_tx.clone()); + } + TryRecvError::Disconnected => { + warn!("dropping connection {:?} handshake response channel receiver disconnected.", &header.dcid); + connections.remove(&header.dcid); + } + }; + } + } + } + ConnectionHandle::Established(e) => { + // receive data into existing connection + match Self::recv_connection(e.connection.as_ref(), &mut rx_buf[..size], recv_info) { + Ok(_len) => { + e.tx_notify.notify_one(); + + let mut rx_waker = e.rx_waker.lock(); + if let Some(waker) = rx_waker.take() { + waker.wake_by_ref(); + } + } + Err(e) => { + // TODO: take action on errors, e.g close connection, send & remove + break 'read Err(e); + } + } + } + } + } + }; + if let Some(udp_tx) = udp_tx { + // receive data on UDP channel + match udp_tx.send(UdpRecv { + pkt: rx_buf[..size].to_vec(), + header, + recv_info, + }).await { + Ok(()) => {}, + Err(e) => warn!("sending dgram to connection {:?} failed with error: {}", conn_id, e) + } + continue 'read; + } + + + if header.ty != Type::Initial { + debug!("Quic packet type is not \"Initial\". Header: {:?}. Continuing...", header); + continue 'read; + } + + // create incoming connection & handle + let (udp_tx, udp_rx) = channel::(HANDSHAKE_PACKET_BUFFER_SIZE); + let (response_tx, response_rx) = channel::(1); + + trace!("new incoming connection {:?}", conn_id); + let connection = Connection::Incoming(IncomingState { + id: conn_id.clone(), + config: self.config.clone(), + + socket: self.socket.clone(), + udp_rx, + response_tx, + + dgram: UdpRecv { + pkt: rx_buf[..size].to_vec(), + header, + recv_info, + }, + + ignore: false, + reject: false, + }); + let handle = ConnectionHandle::Incoming(IncomingHandle { + udp_tx, + response_rx, + }); + + { + let mut connections = self.connections.lock(); + connections.insert(conn_id, handle); + } + + return Ok((connection.into(), from)) + } + } + + fn recv_connection(conn: &Mutex, mut rx_buf: &mut [u8], recv_info: RecvInfo) -> io::Result { + let size = rx_buf.len(); + let mut conn = conn.lock(); + match conn.recv(&mut rx_buf, recv_info) { + Ok(len) => { + debug!("connection received: length={}", len); + debug_assert_eq!(size, len, "size received on connection not equal to len received from network."); + Ok(len) + } + Err(e) => { + error!("connection receive error: {:?}", e); + Err(io::Error::new( + io::ErrorKind::BrokenPipe, + format!("Connection could not receive network data for {:?}. {:?}", + conn.destination_id(), e))) + } + } + } + + fn gen_cid(key: &Key, hdr: &Header) -> ConnectionId<'static> { + let conn_id = ring::hmac::sign(key, &hdr.dcid); + let conn_id = conn_id.as_ref()[..quiche::MAX_CONN_ID_LEN].to_vec(); + let conn_id = ConnectionId::from(conn_id); + trace!("generated connection id {:?}", conn_id); + conn_id + } + + pub(super) fn get_raw_fd(&self) -> RawFd { + self.socket.as_raw_fd() + } +} + +impl ConnectionHandle { + fn establish(&mut self, handle: EstablishedHandle) { + match self { + ConnectionHandle::Incoming(_) => { + let _ = mem::replace(self, ConnectionHandle::Established(handle)); + } + ConnectionHandle::Established(_) => {} + } + } +} + +impl Connection { + fn establish(&mut self, state: EstablishedState) { + match self { + Connection::Incoming(_) => { + let _ = mem::replace(self, Connection::Established(state)); + } + Connection::Established(_) => {} + } + } +} + +impl AsRawFd for Connection { + fn as_raw_fd(&self) -> RawFd { + match self { + Connection::Incoming(s) => s.socket.as_raw_fd(), + Connection::Established(s) => s.socket.as_raw_fd() + } + } +} + +impl Debug for Listener { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Listener") + .field("io", &self.socket) + .finish() + } +} + + +impl Connection { + pub(crate) fn local_addr(&self) -> io::Result { + match self { + Connection::Incoming(s) => s.socket.local_addr(), + Connection::Established(s) => s.socket.local_addr() + } + } +} + +impl Debug for Connection { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("QuicConnection").finish() + } +} + +#[allow(unused_variables)] // TODO: remove +impl AsyncWrite for Connection { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + todo!() + } + + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + todo!() + } + + fn poll_shutdown( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + todo!() + } +} + +#[allow(unused_variables)] // TODO: remove +impl AsyncRead for Connection { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + todo!() + } +} + +impl ConnectionState for Connection { + fn quic_connection_state(&mut self) -> Option<&mut Connection> { + Some(self) + } + + fn is_quic_connection(&self) -> bool { + true + } +} diff --git a/pingora-core/src/protocols/l4/quic/sendto.rs b/pingora-core/src/protocols/l4/quic/sendto.rs index f2a7d1aa2..15936721d 100644 --- a/pingora-core/src/protocols/l4/quic/sendto.rs +++ b/pingora-core/src/protocols/l4/quic/sendto.rs @@ -99,14 +99,7 @@ pub async fn send_to( segment_size: usize, pacing: bool, enable_gso: bool, ) -> io::Result { if pacing && enable_gso { - match send_to_gso_pacing(socket, buf, send_info, segment_size) { - Ok(v) => { - return Ok(v); - }, - Err(e) => { - return Err(e); - }, - } + return send_to_gso_pacing(socket, buf, send_info, segment_size) } let mut off = 0; @@ -156,8 +149,8 @@ pub fn set_txtime_sockopt(sock: &tokio::net::UdpSocket) -> io::Result<()> { use nix::sys::socket::setsockopt; use nix::sys::socket::sockopt::TxTime; - let config = nix::libc::sock_txtime { - clockid: nix::libc::CLOCK_MONOTONIC, + let config = libc::sock_txtime { + clockid: libc::CLOCK_MONOTONIC, flags: 0, }; diff --git a/pingora-core/src/protocols/l4/quic/settings.rs b/pingora-core/src/protocols/l4/quic/settings.rs new file mode 100644 index 000000000..b51091d89 --- /dev/null +++ b/pingora-core/src/protocols/l4/quic/settings.rs @@ -0,0 +1,94 @@ +use std::sync::Arc; +use parking_lot::Mutex; +use quiche::Config; +use pingora_error::{ErrorType, OrErr, Result}; +use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; + +pub struct Settings { + config: Arc>, +} + +impl Settings { + pub(crate) fn try_default() -> Result { + // TODO: use pingora config values where possible + // enable user provided default config + + let mut config = Config::new(quiche::PROTOCOL_VERSION) + .explain_err(ErrorType::InternalError, |_| { + "Failed to create quiche config." + })?; + + config + .load_cert_chain_from_pem_file("/home/hargut/Sources/github.com/pingora/pingora-proxy/tests/utils/conf/keys/server_rustls.crt") + .explain_err(ErrorType::FileReadError, |_| "Could not load certificate chain from pem file.")?; + + config + .load_priv_key_from_pem_file("/home/hargut/Sources/github.com/pingora/pingora-proxy/tests/utils/conf/keys/key.pem") + .explain_err(ErrorType::FileReadError, |_| "Could not load private key from pem file.")?; + + // config.load_verify_locations_from_file() for CA's + // config.verify_peer(); default server = false; client = true + // config.discover_pmtu(false); // default false + config.grease(false); // default true + // config.log_keys() && config.set_keylog(); // logging SSL secrets + // config.set_ticket_key() // session ticket signer key material + + config.enable_early_data(); + + config + .set_application_protos(quiche::h3::APPLICATION_PROTOCOL) + .explain_err(ErrorType::InternalError, |_| { + "Failed to set application protocols." + })?; + + // config.set_application_protos_wire_format(); + // config.set_max_amplification_factor(3); // anti-amplification limit factor; default 3 + + config.set_max_idle_timeout(60 * 1000); // default ulimited + config.set_max_recv_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // recv default is 65527 + config.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 + config.set_initial_max_data(10_000_000); // 10 Mb + config.set_initial_max_stream_data_bidi_local(1_000_000); // 1 Mb + config.set_initial_max_stream_data_bidi_remote(1_000_000); // 1 Mb + config.set_initial_max_stream_data_uni(1_000_000); // 1 Mb + config.set_initial_max_streams_bidi(100); + config.set_initial_max_streams_uni(100); + + // config.set_ack_delay_exponent(3); // default 3 + // config.set_max_ack_delay(25); // default 25 + // config.set_active_connection_id_limit(2); // default 2 + // config.set_disable_active_migration(false); // default false + + // config.set_active_connection_id_limit(2); // default 2 + // config.set_disable_active_migration(false); // default false + // config.set_cc_algorithm_name("cubic"); // default cubic + // config.set_initial_congestion_window_packets(10); // default 10 + // config.set_cc_algorithm(CongestionControlAlgorithm::CUBIC); // default CongestionControlAlgorithm::CUBIC + + // config.enable_hystart(true); // default true + // config.enable_pacing(true); // default true + // config.set_max_pacing_rate(); // default ulimited + + //config.enable_dgram(false); // default false + + // config.set_path_challenge_recv_max_queue_len(3); // default 3 + // config.set_max_connection_window(MAX_CONNECTION_WINDOW); // default 24 Mb + // config.set_max_stream_window(MAX_STREAM_WINDOW); // default 16 Mb + // config.set_stateless_reset_token(None) // default None + // config.set_disable_dcid_reuse(false) // default false + + Ok(Self { + config: Arc::new(Mutex::new(config)) + }) + } + + pub(crate) fn get_config(&self) -> Arc> { + self.config.clone() + } +} + +impl From for Settings { + fn from(config: Config) -> Self { + Self { config: Arc::new(Mutex::new(config)) } + } +} \ No newline at end of file diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs new file mode 100644 index 000000000..cf63e7d38 --- /dev/null +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -0,0 +1,280 @@ +use std::net::SocketAddr; +use std::sync::Arc; +use log::{debug, error, trace, warn}; +use parking_lot::Mutex; +use tokio::net::UdpSocket; +use tokio::sync::Notify; +use pingora_error::{Error, ErrorType, OrErr}; +use crate::protocols::ConnectionState; +use crate::protocols::l4::quic::{Connection, EstablishedHandle, EstablishedState, HandshakeResponse, IncomingState, MAX_IPV6_UDP_PACKET_SIZE}; +use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; +use crate::protocols::l4::quic::sendto::set_txtime_sockopt; +use crate::protocols::l4::stream::Stream as L4Stream; + +pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { + let Some(connection) = stream.quic_connection_state() else { + debug_assert!(false, "quic::handshake called on stream of another type"); + return Err(Error::explain(ErrorType::InternalError, "stream is not a quic stream")) + }; + + let e_state = match connection { + Connection::Incoming(s) => { + if let Some((e_state, e_handle)) = handshake_inner(s).await? { + s.response_tx.send(HandshakeResponse::Established(e_handle)).await + .explain_err(ErrorType::WriteError, + |e| format!("Sending HandshakeResponse failed with {}", e))?; + Some(e_state) + } else { + debug!("handshake either rejected or ignored for connection {:?}", s.id); + None + } + } + Connection::Established(_) => { + debug_assert!(false, "quic::handshake on already established connection"); + return Err(Error::explain(ErrorType::HandshakeError, "handshake state not of type incoming")) + } + }; + + if let Some(e_state) = e_state { + connection.establish(e_state); + Ok(stream) + } else { + Err(Error::explain(ErrorType::HandshakeError, "handshake rejected or ignored")) + } +} + +async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result> { + let IncomingState { + id, + config, + + socket, + udp_rx, + dgram, + + response_tx, + + ignore, + reject + } = state; + + if *ignore { + if let Err(_) = response_tx.send(HandshakeResponse::Ignored).await { + trace!("failed sending endpoint response for incoming connection id={:?}.", id) + }; + return Ok(None); + } else if *reject { + if let Err(_) = response_tx.send(HandshakeResponse::Rejected).await { + trace!("failed sending endpoint response for incoming connection id={:?}.", id) + }; + return Ok(None); + // TODO: send to peer, return err if send fails + } + + let initial_dcid = dgram.header.dcid.clone(); + + // TODO: use correct buf sizes for IPv4 & IPv6 + // for now use IPv6 values as they are smaller, should work as well on IPv4 + let mut out = [0u8; MAX_IPV6_UDP_PACKET_SIZE]; + + if !quiche::version_is_supported(dgram.header.version) { + warn!("QUIC packet version received is not supported. Negotiating version..."); + let size = quiche::negotiate_version(&dgram.header.scid, &dgram.header.dcid, &mut out) + .map_err(|e| Error::explain( + ErrorType::HandshakeError, + format!("Creating version negotiation packet failed. Error: {:?}", e)))?; + + // send data to network + send_dgram(&socket, &out[..size], dgram.recv_info.from).await + .map_err(|e| Error::explain( + ErrorType::WriteError, + format!("Sending version negotiation packet failed. Error: {:?}", e)))?; + + // validate response + if let Some(resp_dgram) = udp_rx.recv().await { + if quiche::version_is_supported(resp_dgram.header.version) { + *dgram = resp_dgram + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "Version negotiation failed responded version is not supported.".to_string())); + }; + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "Version negotiation did not receive a response".to_string())); + } + }; + + // token is always present in "Initial" packets + let token = dgram.header.token.as_ref().unwrap(); + // do stateless retry if the client didn't send a token + if token.is_empty() { + debug!("stateless retry as Quic header token is empty"); + + let hdr = &dgram.header; + let new_token = mint_token(&hdr, &dgram.recv_info.from); + let size = quiche::retry( + &hdr.scid, + &hdr.dcid, + &id, + &new_token, + hdr.version, + &mut out, + ).map_err(|e| Error::explain( + ErrorType::HandshakeError, + format!("Creating retry packet failed. Error: {:?}", e)))?; + + send_dgram(&socket, &out[..size], dgram.recv_info.from).await + .map_err(|e| Error::explain( + ErrorType::WriteError, + format!("Sending retry packet failed. Error: {:?}", e)))?; + + // validate response + if let Some(resp_dgram) = udp_rx.recv().await { + // token is always present in "Initial" packets + let resp_token = resp_dgram.header.token.as_ref().unwrap(); + if resp_token.is_empty() { + return Err(Error::explain( + ErrorType::HandshakeError, + "Stateless retry failed. Still no token available after stateless retry.".to_string())); + } else { + *dgram = resp_dgram; + }; + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "Stateless retry did not receive a response.".to_string())); + } + } + + let hdr = &dgram.header; + let token = hdr.token.as_ref().unwrap(); + let odcid = validate_token(&dgram.recv_info.from, token); + + // The token was not valid, meaning the retry failed, so drop the connection. + if odcid.is_none() { + return Err(Error::explain( + ErrorType::HandshakeError, + "Quic header has invalid address validation token.".to_string())); + } + + // The destination id was not valid, so drop the connection. + if id.len() != hdr.dcid.len() { + return Err(Error::explain( + ErrorType::HandshakeError, + "Quic header has invalid destination connection id.".to_string())); + } + + // Reuse the source connection ID we sent in the Retry packet, + // instead of changing it again. + trace!("new Quic connection odcid={:?} dcid={:?} scid={:?} ", initial_dcid, hdr.dcid, hdr.scid); + + let mut conn; + { + let mut config = config.lock(); + conn = quiche::accept(&hdr.dcid, Some(&initial_dcid), dgram.recv_info.to, dgram.recv_info.from, &mut config) + .map_err(|e| Error::explain( + ErrorType::HandshakeError, + format!("Connection instantiation failed. Error: {:?}", e)))?; + } + + // receive quic data into connection + let buf = dgram.pkt.as_mut_slice(); + conn.recv(buf, dgram.recv_info) + .map_err(|e| Error::explain( + ErrorType::HandshakeError, + format!("Recieving initial data failed. Error: {:?}", e)))?; + + trace!("starting handshake for connection {:?}", id); + // RSA handshake requires more than one packet + while !conn.is_established() { + trace!("creating handshake packet"); + let (size, info) = conn.send(out.as_mut_slice()) + .map_err(|e| Error::explain( + ErrorType::WriteError, + format!("creating handshake packet failed with {:?}", e)))?; + + trace!("sending handshake packet"); + send_dgram(&socket, &out[..size], info.to).await + .map_err(|e| Error::explain( + ErrorType::WriteError, + format!("sending handshake packet failed with {:?}", e)))?; + + trace!("waiting for handshake response"); + if let Some(mut dgram) = udp_rx.recv().await { + trace!("received handshake response"); + let buf = dgram.pkt.as_mut_slice(); + conn.recv(buf, dgram.recv_info) + .map_err(|e| Error::explain( + ErrorType::HandshakeError, + format!("receiving handshake response failed with {:?}", e)))?; + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "finishing handshake failed, did not receive a response")); + } + + trace!("connection established={}, early_data={}, closed={}, draining={}, readable={}, timed_out={}, resumed={}", + conn.is_established(), conn.is_in_early_data(), conn.is_closed(), + conn.is_draining(), conn.is_readable(), conn.is_timed_out(), conn.is_resumed()); + + trace!("connection peer_error={:?}, local_error={:?}", conn.peer_error(), conn.local_error()); + match conn.peer_error() { + None => {} + Some(e) => { + error!("{}", String::from_utf8_lossy(e.reason.as_slice()).to_string()) + } + } + match conn.local_error() { + None => {} + Some(e) => { + error!("{}", String::from_utf8_lossy(e.reason.as_slice()).to_string()) + } + } + } + trace!("handshake successful for connection {:?}", id); + + let _max_send_udp_payload_size = conn.max_send_udp_payload_size(); + let _pacing_enabled = match set_txtime_sockopt(&*socket) { + Ok(_) => { + debug!("successfully set SO_TXTIME socket option"); + true + }, + Err(e) => { + debug!("setsockopt failed {:?}", e); + false + }, + }; + + let state = EstablishedState { + socket: socket.clone(), + connection: Arc::new(Mutex::new(conn)), + tx_notify: Arc::new(Notify::new()), + rx_waker: Arc::new(Mutex::new(None)), + }; + let handle = EstablishedHandle { + connection: state.connection.clone(), + rx_waker: state.rx_waker.clone(), + tx_notify: state.tx_notify.clone() + }; + + Ok(Some((state, handle))) +} + + +// connection io tx directly via socket +async fn send_dgram(io: &Arc, buf: &[u8], to: SocketAddr) -> pingora_error::Result { + match io.send_to(buf, &to).await { + Ok(sent) => { + debug_assert_eq!(sent, buf.len(), "amount of network sent data does not correspond to packet size"); + trace!("sent dgram to={:?} length={:?} ", to, buf.len()); + Ok(sent) + } + Err(e) => { + error!("Failed sending packet via UDP. Error: {:?}", e); + Err(Error::explain( + ErrorType::WriteError, format!("Failed sending packet via UDP. Error: {:?}", e))) + } + } +} \ No newline at end of file diff --git a/pingora-core/src/protocols/l4/quic_internals/mod.rs b/pingora-core/src/protocols/l4/quic_internals/mod.rs deleted file mode 100644 index 174b5db7c..000000000 --- a/pingora-core/src/protocols/l4/quic_internals/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod sendto; -pub(crate) mod id_token; \ No newline at end of file diff --git a/pingora-core/src/protocols/l4/stream.rs b/pingora-core/src/protocols/l4/stream.rs index 3f1fd47a1..6ceb2ce0e 100644 --- a/pingora-core/src/protocols/l4/stream.rs +++ b/pingora-core/src/protocols/l4/stream.rs @@ -18,7 +18,6 @@ use async_trait::async_trait; use futures::FutureExt; use log::{debug, error}; -use parking_lot::Mutex; use pingora_error::{ErrorType::*, OrErr, Result}; #[cfg(target_os = "linux")] use std::io::IoSliceMut; @@ -38,17 +37,17 @@ use tokio::net::TcpStream; use tokio::net::UnixStream; use crate::protocols::l4::ext::{set_tcp_keepalive, TcpKeepalive}; -use crate::protocols::l4::quic::QuicConnection; +use crate::protocols::l4::quic::Connection; use crate::protocols::raw_connect::ProxyDigest; use crate::protocols::{ - ConnectionState, GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, QuicConnectionState, + ConnectionState, GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown, SocketDigest, Ssl, TimingDigest, UniqueID, UniqueIDType, }; use crate::upstreams::peer::Tracer; #[derive(Debug)] enum RawStream { - Quic(QuicConnection), + Quic(Connection), Tcp(TcpStream), #[cfg(unix)] Unix(UnixStream), @@ -430,8 +429,8 @@ impl Stream { } } -impl From for Stream { - fn from(s: QuicConnection) -> Self { +impl From for Stream { + fn from(s: Connection) -> Self { Stream { stream: BufStream::with_capacity( BUF_READ_SIZE, @@ -523,13 +522,20 @@ impl UniqueID for Stream { } impl ConnectionState for Stream { - fn quic_connection_state(&self) -> Option>> { - match &self.stream.get_ref().stream { + fn quic_connection_state(&mut self) -> Option<&mut Connection> { + match &mut self.stream.get_mut().stream { RawStream::Quic(s) => s.quic_connection_state(), RawStream::Tcp(_) => None, RawStream::Unix(_) => None, } } + fn is_quic_connection(&self) -> bool { + match &self.stream.get_ref().stream { + RawStream::Quic(s) => s.is_quic_connection(), + RawStream::Tcp(_) => false, + RawStream::Unix(_) => false + } + } } impl Ssl for Stream {} diff --git a/pingora-core/src/protocols/mod.rs b/pingora-core/src/protocols/mod.rs index 8c9a995f4..046412ae8 100644 --- a/pingora-core/src/protocols/mod.rs +++ b/pingora-core/src/protocols/mod.rs @@ -30,7 +30,6 @@ pub use l4::ext::TcpKeepalive; pub use tls::ALPN; use async_trait::async_trait; -use l4::quic::ConnectionState as QuicConnectionState; use std::fmt::Debug; use std::net::{IpAddr, Ipv4Addr}; use std::sync::Arc; @@ -55,9 +54,12 @@ pub trait UniqueID { /// Interface to get the raw connection for e.g. non-connection based network protocols like UDP/QUIC pub trait ConnectionState { - fn quic_connection_state(&self) -> Option>> { + fn quic_connection_state(&mut self) -> Option<&mut Connection> { None } + fn is_quic_connection(&self) -> bool { + false + } } /// Interface to get TLS info @@ -255,12 +257,12 @@ use l4::socket::SocketAddr; use log::{debug, error}; #[cfg(unix)] use nix::sys::socket::{getpeername, SockaddrStorage, UnixAddr}; -use parking_lot::Mutex; #[cfg(unix)] use std::os::unix::prelude::AsRawFd; #[cfg(windows)] use std::os::windows::io::AsRawSocket; use std::{net::SocketAddr as InetSocketAddr, path::Path}; +use crate::protocols::l4::quic::Connection; #[cfg(unix)] impl ConnFdReusable for SocketAddr { From 79124805f34c4341cafd21095ace25b1483d99c1 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Sat, 4 Jan 2025 12:13:28 +0100 Subject: [PATCH 05/52] import helpers from cloudflare/quiche@0570ab83 sources cloudflare/quiche@0570ab83/quiche/src/stream/mod.rs --- pingora-core/src/protocols/http/v3/nohash.rs | 60 ++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 pingora-core/src/protocols/http/v3/nohash.rs diff --git a/pingora-core/src/protocols/http/v3/nohash.rs b/pingora-core/src/protocols/http/v3/nohash.rs new file mode 100644 index 000000000..357c0575d --- /dev/null +++ b/pingora-core/src/protocols/http/v3/nohash.rs @@ -0,0 +1,60 @@ +// Copyright (C) 2018-2019, Cloudflare, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +use std::collections::{HashMap, HashSet}; + +/// A simple no-op hasher for Stream IDs. +/// +/// The QUIC protocol and quiche library guarantees stream ID uniqueness, so +/// we can save effort by avoiding using a more complicated algorithm. +#[derive(Default)] +pub struct StreamIdHasher { + id: u64, +} + +impl std::hash::Hasher for StreamIdHasher { + #[inline] + fn finish(&self) -> u64 { + self.id + } + + #[inline] + fn write_u64(&mut self, id: u64) { + self.id = id; + } + + #[inline] + fn write(&mut self, _: &[u8]) { + // We need a default write() for the trait but stream IDs will always + // be a u64 so we just delegate to write_u64. + unimplemented!() + } +} + +type BuildStreamIdHasher = std::hash::BuildHasherDefault; + +pub type StreamIdHashMap = HashMap; +pub type StreamIdHashSet = HashSet; \ No newline at end of file From af61e2fe9a23dad162207a3124c449e6933e16a6 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Sat, 4 Jan 2025 12:23:40 +0100 Subject: [PATCH 06/52] add Quic ConnectionTx task, HTTP3 handshake and HTTP3 session --- pingora-core/src/protocols/http/v3/mod.rs | 57 ++++++ pingora-core/src/protocols/http/v3/server.rs | 191 +++++++++++++++++- pingora-core/src/protocols/l4/quic/mod.rs | 173 ++++++++++++++-- pingora-core/src/protocols/l4/quic/sendto.rs | 1 + .../src/protocols/l4/quic/tls_handshake.rs | 40 +++- pingora-error/src/lib.rs | 2 + 6 files changed, 435 insertions(+), 29 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index d2e3e318e..24acfba50 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -14,4 +14,61 @@ //! HTTP/3 implementation +use http::{HeaderMap, HeaderName, HeaderValue, Request, Uri, Version}; +use log::warn; +use quiche::h3::{Header, NameValue}; +use pingora_http::{RequestHeader, ResponseHeader}; + pub mod server; +pub mod nohash; + +pub fn event_to_request_headers(list: &Vec
) -> RequestHeader { + let (mut parts, _) = Request::new(()).into_parts(); + let mut uri = Uri::builder(); + let mut headers = HeaderMap::new(); + + for h in list { + match h.name() { + b":scheme" => uri = uri.scheme(h.value()), + b":authority" => uri = uri.authority(h.value()), + b":path" => uri = uri.path_and_query(h.value()), + b":method" => match h.value().try_into() { + Ok(v) => parts.method = v, + Err(_) => { + warn!("Failed to parse method from input: {:?}", h.value()) + } + }, + _ => { + match HeaderName::from_bytes(h.name()) { + Ok(k) => match HeaderValue::from_bytes(h.value()) { + Ok(v) => { + headers.append(k, v); + } + Err(_) => { + warn!("Failed to parse header value from input: {:?}", h.value()) + } + }, + Err(_) => { + warn!("Failed to parse header name input: {:?}", h.name()) + } + }; + } + } + } + + parts.version = Version::HTTP_3; + parts.uri = uri.build().unwrap(); // TODO: use result + parts.headers = headers; + parts.into() +} + +#[allow(unused)] // TODO: remove +fn response_headers_to_event(resp: &ResponseHeader) -> Vec
{ + let mut qheaders: Vec
= Vec::with_capacity(resp.headers.len() + 1); + qheaders.push(Header::new(b":status", resp.status.as_str().as_bytes())); + + for (k, v) in &resp.headers { + qheaders.push(Header::new(k.as_str().as_bytes(), v.as_bytes())) + } + qheaders +} \ No newline at end of file diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 9cff0d35a..d181f1356 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -18,29 +18,84 @@ use crate::protocols::{Digest, SocketAddr, Stream}; use bytes::Bytes; use http::uri::PathAndQuery; use http::HeaderMap; -use pingora_error::Result; +use pingora_error::{Error, ErrorType, OrErr, Result}; use std::future::Future; use std::pin::Pin; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; use std::task::{Context, Poll}; - +use log::{debug, info, trace}; +use parking_lot::Mutex; use crate::protocols::http::v1::client::http_req_header_to_wire; use pingora_http::{RequestHeader, ResponseHeader}; use crate::protocols::http::HttpTask; pub use quiche::h3::Config as H3Options; +use crate::protocols::l4::quic::Connection; +use quiche::{Connection as QuicheConnection}; +use quiche::h3::{Connection as QuicheH3Connection, Event, Header}; +use tokio::sync::{mpsc, Notify}; +use tokio::sync::mpsc::Receiver; +use crate::protocols::http::v3::event_to_request_headers; +use crate::protocols::http::v3::nohash::StreamIdHashMap; + +static H3_OPTIONS: OnceLock = OnceLock::new(); + +const H3_SESSION_EVENTS_CHANNEL_SIZE : usize = 256; /// Perform HTTP/3 connection handshake with an established (QUIC) connection. /// /// The optional `options` allow to adjust certain HTTP/3 parameters and settings. /// See [`H3Options`] for more details. -#[allow(unused)] // TODO: remove -pub async fn handshake(io: Stream, options: Option<&H3Options>) -> Result { - Ok(H3Connection { _l4stream: io }) +pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result { + let options = options.unwrap_or(H3_OPTIONS.get_or_init(|| H3Options::new().unwrap())); + + let Some(conn) = io.quic_connection_state() else { + return Err(Error::explain( + ErrorType::ConnectError, "HTTP3 handshake only possible on Quic connections")); + }; + + let (conn_id, qconn, hconn, + tx_notify, rx_notify) = match conn { + Connection::Incoming(_) => { + return Err(Error::explain( + ErrorType::InternalError, + "connection needs to be established, invalid state")) + } + Connection::Established(state) => { + let conn_id; + let hconn = { + let mut qconn = state.connection.lock(); + conn_id = qconn.trace_id().to_string(); + quiche::h3::Connection::with_transport(&mut qconn, &options) + .explain_err(ErrorType::ConnectError, |e| { + format!("failed to create HTTP3 connection with {e}") + })? + }; + (conn_id, state.connection.clone(), hconn, state.tx_notify.clone(), state.rx_notify.clone()) + } + }; + + Ok(H3Connection { + _l4stream: io, + id: conn_id.to_string(), + quic_connection: qconn, + h3_connection: Arc::new(Mutex::new(hconn)), + tx_notify, + rx_notify, + sessions: Default::default(), + }) } pub struct H3Connection { _l4stream: Stream, // ensure the stream will not be dropped until all sessions are + id: String, + quic_connection: Arc>, + h3_connection: Arc>, + + tx_notify: Arc, + rx_notify: Arc, + + sessions: StreamIdHashMap> } impl H3Connection { @@ -55,6 +110,17 @@ impl H3Connection { /// HTTP/3 server session #[allow(unused)] // TODO: remove pub struct HttpSession { + connection_id: String, + stream_id: u64, + quic_connection: Arc>, + h3_connection: Arc>, + + tx_notify: Arc, + event_rx: Receiver, + + request_event_headers: Vec
, + request_event_more_frames: bool, + request_header: Option, // Remember what has been written response_written: Option>, @@ -89,7 +155,118 @@ impl HttpSession { conn: &mut H3Connection, digest: Arc, ) -> Result> { - todo!(); + 'poll: loop { + let poll = { + let mut qconn = conn.quic_connection.lock(); + let mut hconn = conn.h3_connection.lock(); + // NOTE: poll() drives the entire Quic/HTTP3 connection + hconn.poll(&mut qconn) + }; + + match poll { + Ok((stream_id, ev)) => { + if let Some(channel) = conn.sessions.get(&stream_id) { + debug!( + "HTTP3 Connection {} with stream id {} forward event {:?} to handler.", + conn.id, stream_id, ev + ); + channel.send(ev); + } else { + debug!( + "HTTP3 Connection {} with stream id {} received event {:?}", + conn.id, stream_id, &ev + ); + match ev { + Event::Data + | Event::Finished + | Event::Reset(_) + | Event::PriorityUpdate => { + debug_assert!(false, "event type requires corresponding session") + } + Event::GoAway => { + info!("Received GoAway, dropping connection."); + return Ok(None) + }, + Event::Headers { list, more_frames: has_body } => { + trace!( + "HTTP3 Connection {} request headers: {:?}, more_frames: {:?}", + conn.id, + &list, + &has_body + ); + + let (event_tx, event_rx) = mpsc::channel(H3_SESSION_EVENTS_CHANNEL_SIZE); + let session = HttpSession { + connection_id: conn.id.clone(), + stream_id, + + quic_connection: conn.quic_connection.clone(), + h3_connection: conn.h3_connection.clone(), + + tx_notify: conn.tx_notify.clone(), + event_rx, + + request_header: Some(event_to_request_headers(&list)), + response_written: None, + + request_event_headers: list, + request_event_more_frames: has_body, + + body_sent: 0, + send_ended: false, + + digest + }; + + if let Some(_) = conn.sessions.insert(stream_id, event_tx) { + debug_assert!(false, "existing session is not allowed. {stream_id}") + }; + return Ok(Some(session)); + } + } + } + } + Err(quiche::h3::Error::Done) => { + debug!("H3Connection {} currently no events available.", conn.id); + // TODO: in case PriorityUpdate was triggered take_priority_update should be called here + let is_active; + let timeout; + { + let mut qconn = conn.quic_connection.lock(); + if qconn.is_closed() { + return Ok(None) + } + is_active = qconn.is_established() && !qconn.is_in_early_data(); + timeout = qconn.timeout(); + } + + if is_active { + debug!("Quic connection {:?} is still active. Timeout: {:?}", conn.id, timeout); + if let Some(timeout) = timeout { + // race for new data on connection or timeout + tokio::select! { + _timeout = tokio::time::sleep(timeout) => { + let mut qconn = conn.quic_connection.lock(); + qconn.on_timeout(); + } + _data = conn.rx_notify.notified() => {} + } + }; + + continue 'poll + } + + debug!("H3Connection {} waiting for data", conn.id); + continue 'poll; + } + Err(err) => { + info!("Received error, dropping connection. {:?}", err); + return Err(err).explain_err(ErrorType::H3Error, |e| { + format!("While accepting new downstream requests. Error: {e}") + }) + } + } + } } /// The request sent from the client diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 88ec75b73..89a9bf252 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -5,10 +5,10 @@ use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; use std::pin::Pin; use std::sync::Arc; -use std::task::{Context, Poll, Waker}; +use std::task::{Context, Poll}; use log::{debug, error, trace, warn}; use parking_lot::Mutex; -use quiche::{Config, ConnectionId, Header, RecvInfo, Type}; +use quiche::{Config, ConnectionId, Header, RecvInfo, Stats, Type}; use ring::hmac::Key; use ring::rand::SystemRandom; use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; @@ -16,8 +16,9 @@ use tokio::net::UdpSocket; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::Notify; -use pingora_error::{BError, Error, ErrorType}; +use pingora_error::{BError, Error, ErrorType, Result}; use quiche::Connection as QuicheConnection; +use tokio::task::JoinHandle; use settings::Settings as QuicSettings; #[allow(unused)] // TODO: remove @@ -27,6 +28,7 @@ pub(crate) mod tls_handshake; mod settings; use crate::protocols::ConnectionState; +use crate::protocols::l4::quic::sendto::send_to; use crate::protocols::l4::stream::Stream as L4Stream; // UDP header 8 bytes, IPv4 Header 20 bytes @@ -79,9 +81,11 @@ pub struct IncomingState { pub struct EstablishedState { socket: Arc, - connection: Arc>, - tx_notify: Arc, - rx_waker: Arc>> + tx_handle: JoinHandle>, + + pub connection: Arc>, + pub tx_notify: Arc, + pub rx_notify: Arc, } pub enum ConnectionHandle { @@ -114,7 +118,7 @@ pub(crate) enum HandshakeResponse { #[derive(Clone)] pub struct EstablishedHandle { connection: Arc>, - rx_waker: Arc>>, + rx_notify: Arc, tx_notify: Arc, } @@ -230,12 +234,8 @@ impl Listener { // receive data into existing connection match Self::recv_connection(e.connection.as_ref(), &mut rx_buf[..size], recv_info) { Ok(_len) => { + e.rx_notify.notify_one(); e.tx_notify.notify_one(); - - let mut rx_waker = e.rx_waker.lock(); - if let Some(waker) = rx_waker.take() { - waker.wake_by_ref(); - } } Err(e) => { // TODO: take action on errors, e.g close connection, send & remove @@ -346,6 +346,11 @@ impl ConnectionHandle { impl Connection { fn establish(&mut self, state: EstablishedState) { + if cfg!(test) { + let conn = state.connection.lock(); + debug_assert!(conn.is_established() || conn.is_in_early_data(), + "connection must be established or ready for data") + } match self { Connection::Incoming(_) => { let _ = mem::replace(self, Connection::Established(state)); @@ -355,6 +360,150 @@ impl Connection { } } +impl Drop for Connection { + fn drop(&mut self) { + match self { + Connection::Incoming(_) => {} + Connection::Established(s) => { + if !s.tx_handle.is_finished() { + s.tx_handle.abort(); + trace!("stopped connection tx task"); + } + } + } + } +} + +struct ConnectionTx { + socket: Arc, + + connection: Arc>, + connection_id: String, + + tx_notify: Arc, + tx_stats: TxBurst, + + gso_enabled: bool, + pacing_enabled: bool, +} + +impl ConnectionTx { + async fn start_tx(mut self) -> Result<()> { + let id = self.connection_id; + let mut out = [0u8;MAX_IPV6_BUF_SIZE]; + + let mut finished_sending = false; + debug!("connection tx write"); + 'write: loop { + // update stats from connection + let max_send_burst = { + let conn = self.connection.lock(); + self.tx_stats.max_send_burst(conn.stats(), conn.send_quantum()) + }; + let mut total_write = 0; + let mut dst_info = None; + + // fill tx buffer with connection data + trace!("total_write={}, max_send_burst={}", total_write, max_send_burst); + 'fill: while total_write < max_send_burst { + let send = { + let mut conn = self.connection.lock(); + conn.send(&mut out[total_write..max_send_burst]) + }; + + let (size, send_info) = match send { + Ok((size, info)) => { + debug!("connection sent to={:?}, length={}", info.to, size); + (size, info) + }, + Err(e) => { + if e == quiche::Error::Done { + trace!("connection send finished"); + finished_sending = true; + break 'fill; + } + error!("connection send error: {:?}", e); + /* TODO: close connection + let mut conn = self.connection.lock(); + conn.close(false, 0x1, b"fail").ok(); + */ + break 'write Err(Error::explain( + ErrorType::WriteError, + format!("Connection {:?} send data to network failed with {:?}", id, e))); + } + }; + + total_write += size; + // Use the first packet time to send, not the last. + let _ = dst_info.get_or_insert(send_info); + } + + if total_write == 0 || dst_info.is_none() { + debug!("nothing to send, waiting for notification..."); + self.tx_notify.notified().await; + continue; + } + let dst_info = dst_info.unwrap(); + + // send to network + if let Err(e) = send_to( + &self.socket, + &out[..total_write], + &dst_info, + self.tx_stats.max_datagram_size, + self.pacing_enabled, + self.gso_enabled, + ).await { + if e.kind() == io::ErrorKind::WouldBlock { + error!("network socket would block"); + continue + } + break 'write Err(Error::explain( + ErrorType::WriteError, + format!("network send failed with {:?}", e))); + } + trace!("network sent to={} bytes={}", dst_info.to, total_write); + + if finished_sending { + debug!("sending finished, waiting for notification..."); + self.tx_notify.notified().await + } + } + } +} + +pub struct TxBurst { + loss_rate: f64, + max_send_burst: usize, + max_datagram_size: usize +} + +impl TxBurst { + fn new(max_send_udp_payload_size: usize) -> Self { + Self { + loss_rate: 0.0, + max_send_burst: MAX_IPV6_BUF_SIZE, + max_datagram_size: max_send_udp_payload_size, + } + } + + fn max_send_burst(&mut self, stats: Stats, send_quantum: usize) -> usize { + // Reduce max_send_burst by 25% if loss is increasing more than 0.1%. + let loss_rate = stats.lost as f64 / stats.sent as f64; + + if loss_rate > self.loss_rate + 0.001 { + self.max_send_burst = self.max_send_burst / 4 * 3; + // Minimum bound of 10xMSS. + self.max_send_burst = + self.max_send_burst.max(self.max_datagram_size * 10); + self.loss_rate = loss_rate; + } + + send_quantum.min(self.max_send_burst) / + self.max_datagram_size * self.max_datagram_size + } +} + impl AsRawFd for Connection { fn as_raw_fd(&self) -> RawFd { match self { diff --git a/pingora-core/src/protocols/l4/quic/sendto.rs b/pingora-core/src/protocols/l4/quic/sendto.rs index 15936721d..a8052e6fa 100644 --- a/pingora-core/src/protocols/l4/quic/sendto.rs +++ b/pingora-core/src/protocols/l4/quic/sendto.rs @@ -65,6 +65,7 @@ fn send_to_gso_pacing( let cmsg_gso = ControlMessage::UdpGsoSegments(&segment_size); // Pacing option. + // TODO: fix & enable //let send_time = std_time_to_u64(&send_info.at); //let cmsg_txtime = ControlMessage::TxTime(&send_time); diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs index cf63e7d38..989d32de9 100644 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -6,9 +6,9 @@ use tokio::net::UdpSocket; use tokio::sync::Notify; use pingora_error::{Error, ErrorType, OrErr}; use crate::protocols::ConnectionState; -use crate::protocols::l4::quic::{Connection, EstablishedHandle, EstablishedState, HandshakeResponse, IncomingState, MAX_IPV6_UDP_PACKET_SIZE}; +use crate::protocols::l4::quic::{Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, IncomingState, TxBurst, MAX_IPV6_QUIC_DATAGRAM_SIZE, MAX_IPV6_UDP_PACKET_SIZE}; use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; -use crate::protocols::l4::quic::sendto::set_txtime_sockopt; +use crate::protocols::l4::quic::sendto::{detect_gso, set_txtime_sockopt}; use crate::protocols::l4::stream::Stream as L4Stream; pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { @@ -201,6 +201,7 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result pingora_error::Result { debug!("successfully set SO_TXTIME socket option"); true @@ -247,16 +250,33 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result "InvalidHTTPHeader", ErrorType::H1Error => "H1Error", ErrorType::H2Error => "H2Error", + ErrorType::H3Error => "H3Error", ErrorType::InvalidH2 => "InvalidH2", ErrorType::H2Downgrade => "H2Downgrade", ErrorType::ReadError => "ReadError", From dfcbebdcfd5070f7dbda874b54264389b58a2e5b Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Sun, 5 Jan 2025 12:01:52 +0100 Subject: [PATCH 07/52] successful curl HTTP3 requests/responses --- pingora-core/src/protocols/http/v3/mod.rs | 1 - pingora-core/src/protocols/http/v3/server.rs | 292 ++++++++++++++++-- pingora-core/src/protocols/l4/quic/mod.rs | 33 +- .../src/protocols/l4/quic/tls_handshake.rs | 11 +- pingora-core/tests/utils/mod.rs | 28 +- 5 files changed, 315 insertions(+), 50 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 24acfba50..237f1d248 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -62,7 +62,6 @@ pub fn event_to_request_headers(list: &Vec
) -> RequestHeader { parts.into() } -#[allow(unused)] // TODO: remove fn response_headers_to_event(resp: &ResponseHeader) -> Vec
{ let mut qheaders: Vec
= Vec::with_capacity(resp.headers.len() + 1); qheaders.push(Header::new(b":status", resp.status.as_str().as_bytes())); diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index d181f1356..8778ea830 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -14,28 +14,31 @@ //! HTTP/3 server session +use std::fmt::Debug; use crate::protocols::{Digest, SocketAddr, Stream}; -use bytes::Bytes; +use bytes::{BufMut, Bytes, BytesMut}; use http::uri::PathAndQuery; -use http::HeaderMap; +use http::{header, HeaderMap, HeaderName}; use pingora_error::{Error, ErrorType, OrErr, Result}; use std::future::Future; use std::pin::Pin; use std::sync::{Arc, OnceLock}; use std::task::{Context, Poll}; -use log::{debug, info, trace}; +use log::{debug, error, info, trace, warn}; use parking_lot::Mutex; use crate::protocols::http::v1::client::http_req_header_to_wire; use pingora_http::{RequestHeader, ResponseHeader}; +use crate::protocols::http::date::get_cached_date; use crate::protocols::http::HttpTask; pub use quiche::h3::Config as H3Options; -use crate::protocols::l4::quic::Connection; -use quiche::{Connection as QuicheConnection}; -use quiche::h3::{Connection as QuicheH3Connection, Event, Header}; +use crate::protocols::l4::quic::{Connection, MAX_IPV6_QUIC_DATAGRAM_SIZE}; +use quiche::{h3, Connection as QuicheConnection}; +use quiche::h3::{Connection as QuicheH3Connection, Event, NameValue}; use tokio::sync::{mpsc, Notify}; use tokio::sync::mpsc::Receiver; -use crate::protocols::http::v3::event_to_request_headers; +use crate::protocols::http::body_buffer::FixedBuffer; +use crate::protocols::http::v3::{event_to_request_headers, response_headers_to_event}; use crate::protocols::http::v3::nohash::StreamIdHashMap; static H3_OPTIONS: OnceLock = OnceLock::new(); @@ -108,22 +111,29 @@ impl H3Connection { } /// HTTP/3 server session -#[allow(unused)] // TODO: remove pub struct HttpSession { connection_id: String, stream_id: u64, quic_connection: Arc>, h3_connection: Arc>, + // trigger Quic send, continue ConnectionTx write loop tx_notify: Arc, - event_rx: Receiver, + // receive notification on Quic recv, used to check stream capacity + // as it only increases after MaxData or MaxStreamData frame was received + rx_notify: Arc, - request_event_headers: Vec
, - request_event_more_frames: bool, + // HTTP3 event channel for this stream_id + event_rx: Receiver, request_header: Option, + read_ended: bool, + body_read: usize, + // buffered request body for retry logic + body_retry_buffer: Option, + // Remember what has been written - response_written: Option>, + response_header_written: Option>, // How many (application, not wire) response body bytes have been sent so far. body_sent: usize, @@ -167,13 +177,16 @@ impl HttpSession { Ok((stream_id, ev)) => { if let Some(channel) = conn.sessions.get(&stream_id) { debug!( - "HTTP3 Connection {} with stream id {} forward event {:?} to handler.", + "HTTP3 conn_id={} stream_id={} forward event={:?}", conn.id, stream_id, ev ); - channel.send(ev); + channel.send(ev).await + .explain_err( + ErrorType::WriteError, + |e| format!("failed to send on event channel with {}", e))?; } else { debug!( - "HTTP3 Connection {} with stream id {} received event {:?}", + "HTTP3 conn_id={} stream_id={} received event {:?}", conn.id, stream_id, &ev ); match ev { @@ -187,12 +200,12 @@ impl HttpSession { info!("Received GoAway, dropping connection."); return Ok(None) }, - Event::Headers { list, more_frames: has_body } => { + Event::Headers { list, more_frames: stream_continues } => { trace!( - "HTTP3 Connection {} request headers: {:?}, more_frames: {:?}", + "HTTP3 conn_id={} request headers: {:?}, more_frames: {:?}", conn.id, &list, - &has_body + &stream_continues ); let (event_tx, event_rx) = mpsc::channel(H3_SESSION_EVENTS_CHANNEL_SIZE); @@ -204,14 +217,15 @@ impl HttpSession { h3_connection: conn.h3_connection.clone(), tx_notify: conn.tx_notify.clone(), + rx_notify: conn.rx_notify.clone(), event_rx, + read_ended: !stream_continues, request_header: Some(event_to_request_headers(&list)), - response_written: None, - - request_event_headers: list, - request_event_more_frames: has_body, + body_read: 0, + body_retry_buffer: None, + response_header_written: None, body_sent: 0, send_ended: false, @@ -227,7 +241,7 @@ impl HttpSession { } } Err(quiche::h3::Error::Done) => { - debug!("H3Connection {} currently no events available.", conn.id); + debug!("HTTP3 conn_id={} no events available", conn.id); // TODO: in case PriorityUpdate was triggered take_priority_update should be called here let is_active; let timeout; @@ -256,7 +270,7 @@ impl HttpSession { continue 'poll } - debug!("H3Connection {} waiting for data", conn.id); + debug!("HTTP3 conn_id={} waiting for data", conn.id); continue 'poll; } Err(err) => { @@ -287,7 +301,45 @@ impl HttpSession { /// Read request body bytes. `None` when there is no more body to read. pub async fn read_body_bytes(&mut self) -> Result> { - todo!(); + self.data_finished_event().await?; + if self.read_ended { + return Ok(None) + } + + let mut buf = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; + let size = match self.recv_body(&mut buf) { + Ok(size) => size, + Err(h3::Error::Done) => { + error!("recv_body: Done"); + return Ok(Some(BytesMut::with_capacity(0).into())) + }, + Err(e) => return Err(Error::explain( + ErrorType::ReadError, format!("reading body failed with {}", e))) + }; + + let mut data = BytesMut::with_capacity(size); + data.put_slice(&buf[..size]); + let data: Bytes = data.into(); + + self.body_read += size; + if let Some(buffer) = &mut self.body_retry_buffer { + buffer.write_to_buffer(&data); + } + + trace!("ready body len={:?}", data.len()); + Ok(Some(data)) + } + + + fn recv_body(&self, out: &mut [u8]) -> h3::Result<(usize)> { + let mut qconn = self.quic_connection.lock(); + let mut hconn = self.h3_connection.lock(); + debug!( + "HTTP3 conn_id={} stream_id={} receiving body", + qconn.trace_id(), + self.stream_id + ); + hconn.recv_body(&mut qconn, self.stream_id, out) } // the write_* don't have timeouts because the actual writing happens on the connection @@ -302,12 +354,144 @@ impl HttpSession { mut header: Box, end: bool, ) -> Result<()> { - todo!(); + if self.send_ended { + // TODO: error or warn? + warn!("Http session already ended."); + return Ok(()); + } else if self.response_header_written.as_ref().is_some() { + warn!("Response header is already sent, cannot send again"); + return Ok(()); + } + + /* TODO: check if should that be as well handled like that? + if header.status.is_informational() { + // ignore informational response 1xx header because send_response() can only be called once + // https://github.com/hyperium/h2/issues/167 + debug!("ignoring informational headers"); + return Ok(()); + } */ + + /* update headers */ + header.insert_header(header::DATE, get_cached_date())?; + + // TODO: check if this is correct for H3 + // remove other h1 hop headers that cannot be present in H3 + // https://httpwg.org/specs/rfc7540.html#n-connection-specific-header-fields + header.remove_header(&header::TRANSFER_ENCODING); + header.remove_header(&header::CONNECTION); + header.remove_header(&header::UPGRADE); + header.remove_header(&HeaderName::from_static("keep-alive")); + header.remove_header(&HeaderName::from_static("proxy-connection")); + + let headers = response_headers_to_event(&header); + let sent = self.send_response(headers.as_slice(), end).await; + + match sent { + Ok(()) => { + self.tx_notify.notify_one(); + } + Err(h3::Error::Done) => {}, + Err(e) => return Err(e) + .explain_err( + ErrorType::WriteError, + |e| format!("failed to write response to http3 connection with {}", e)), + } + + self.response_header_written = Some(header); + self.send_ended = self.send_ended || end; + Ok(()) + } + + async fn send_response( + &self, + headers: &[T], + fin: bool, + ) -> h3::Result<()> { + let mut qconn = self.quic_connection.lock(); + let mut hconn = self.h3_connection.lock(); + + // TODO: use qconn.stream_capacity(stream_id) or qconn.stream_writeable(stream_id) + // eventually retry in case send_response returns a StreamBlocked error + debug!( + "HTTP3 conn_id={} stream_id={} sending response headers={:?}, finished={}", + qconn.trace_id(), + self.stream_id, + headers, + fin + ); + hconn.send_response(&mut qconn, self.stream_id, headers, fin) } /// Write response body to the client. See [Self::write_response_header] for how to use `end`. pub async fn write_body(&mut self, data: Bytes, end: bool) -> Result<()> { - todo!(); + if self.send_ended { + // NOTE: in h1, we also track to see if content-length matches the data + // We have not tracked that in h3 + warn!("Cannot write body after stream ended. Dropping the extra data."); + return Ok(()); + } else if self.response_header_written.is_none() { + return Err(Error::explain( + ErrorType::H3Error, + "Trying to send the body before header being sent.", + )); + }; + + let mut sent_len = 0; + while sent_len < data.len() { + let capacity = self.stream_capacity().await + .explain_err( + ErrorType::WriteError, + |e| format!("Failed to acquire capacity on stream id {} with {}", self.stream_id, e))?; + + let send; + if capacity > data.len() { + send = &data[sent_len..data.len()]; + } else { + send = &data[sent_len..capacity]; + } + + match self.send_body(send, end).await { + Ok(sent_size) => { + sent_len += sent_size; + self.tx_notify.notify_one(); + }, + Err(_e) => { + return Err(Error::explain( + ErrorType::WriteError, + format!("Writing h3 response body to downstream failed. {}", _e), + )) + } + } + } + + self.body_sent += sent_len; + self.send_ended = self.send_ended || end; + Ok(()) + } + + async fn send_body(&self, body: &[u8], fin: bool) -> h3::Result { + let mut qconn = self.quic_connection.lock(); + let mut hconn = self.h3_connection.lock(); + + debug!("HTTP3 conn_id={} stream_id={} sending response body with length={:?}, finished={}", + self.connection_id, self.stream_id, body.len(), fin); + + hconn.send_body(&mut qconn, self.stream_id, body, fin) + } + + async fn stream_capacity(&self) -> quiche::Result { + let capacity; + { + let qconn = self.quic_connection.lock(); + capacity = qconn.stream_capacity(self.stream_id)?; + } + + if capacity > 0 { + Ok(capacity) + } else { + self.rx_notify.notified().await; + Box::pin(self.stream_capacity()).await + } } /// Write response trailers to the client, this also closes the stream. @@ -334,7 +518,55 @@ impl HttpSession { pub async fn finish(&mut self) -> Result<()> { // TODO: check/validate with documentation on protocols::http::server::HttpSession // TODO: check/validate trailer sending - todo!(); + if self.send_ended { + // already ended the stream + return Ok(()); + } + + // use an empty data frame to signal the end + self.send_body(&[], true) + .await + .explain_err( + ErrorType::WriteError, + |e| format! {"Writing h3 response body to downstream failed. {e}"}, + )?; + + self.send_ended = true; + // else: the response header is not sent, do nothing now. + // When send_response_body is dropped, an RST_STREAM will be sent + + Ok(()) + } + + async fn data_finished_event(&mut self) -> Result<()> { + loop { + match self.event_rx.recv().await { + Some(ev) => { + trace!("event {:?}", ev); + match ev { + Event::Finished => { + self.read_ended = true; + return Ok(()) + } + Event::Headers { .. } => { + debug_assert!(false, "Headers or Finished event when Data requested"); + }, + Event::Data => { + return Ok(()) + } + // TODO: handle events correctly + Event::Reset(_) | + Event::PriorityUpdate | + Event::GoAway => { + continue + }, + } + } + None => return Err(Error::explain( + ErrorType::ReadError, + "HTTP3 Session event channel disconnected.")), + } + } } pub async fn response_duplex_vec(&mut self, tasks: Vec) -> Result { @@ -397,7 +629,7 @@ impl HttpSession { /// Return the written response header. `None` if it is not written yet. pub fn response_written(&self) -> Option<&ResponseHeader> { - self.response_written.as_deref() + self.response_header_written.as_deref() } /// Give up the stream abruptly. @@ -496,4 +728,4 @@ impl<'a> Future for Idle<'a> { fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { todo!(); } -} +} \ No newline at end of file diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 89a9bf252..7080b61c1 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -16,7 +16,7 @@ use tokio::net::UdpSocket; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::Notify; -use pingora_error::{BError, Error, ErrorType, Result}; +use pingora_error::{BError, Error, ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; use tokio::task::JoinHandle; use settings::Settings as QuicSettings; @@ -162,7 +162,7 @@ impl Listener { pub(crate) async fn accept(&self) -> io::Result<(L4Stream, SocketAddr)> { let mut rx_buf = [0u8; MAX_IPV6_BUF_SIZE]; - trace!("endpoint rx loop"); + debug!("endpoint rx loop"); 'read: loop { // receive from network and parse Quic header let (size, from) = self.socket.recv_from(&mut rx_buf).await?; @@ -234,7 +234,7 @@ impl Listener { // receive data into existing connection match Self::recv_connection(e.connection.as_ref(), &mut rx_buf[..size], recv_info) { Ok(_len) => { - e.rx_notify.notify_one(); + e.rx_notify.notify_waiters(); e.tx_notify.notify_one(); } Err(e) => { @@ -345,17 +345,34 @@ impl ConnectionHandle { } impl Connection { - fn establish(&mut self, state: EstablishedState) { + async fn establish(&mut self, state: EstablishedState) -> Result<()> { if cfg!(test) { let conn = state.connection.lock(); debug_assert!(conn.is_established() || conn.is_in_early_data(), "connection must be established or ready for data") } match self { - Connection::Incoming(_) => { + Connection::Incoming(s) => { + /* + // consume packets that potentially arrived during state transition + while !s.udp_rx.is_empty() { + error!("consuming {} packets which arrived during state transition", s.udp_rx.len()); + let mut dgram= s.udp_rx.recv().await; + if let Some(mut dgram) = dgram { + let mut qconn = state.connection.lock(); + qconn.recv(&mut dgram.pkt.as_mut_slice(), dgram.recv_info).explain_err( + ErrorType::ReadError, + |e| format!("receiving dgram on quic connection failed with {:?}", e))?; + } + } + s.udp_rx.close(); + */ let _ = mem::replace(self, Connection::Established(state)); + Ok(()) } - Connection::Established(_) => {} + Connection::Established(_) => Err(Error::explain( + ErrorType::InternalError, + "establishing connection only possible on incoming connection")) } } } @@ -548,7 +565,9 @@ impl AsyncWrite for Connection { } fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - todo!() + // FIXME: this is called on l4::Stream::drop() + // correlates to the connection, check if stopping tx loop for connection & final flush is feasible + Poll::Ready(Ok(())) } fn poll_shutdown( diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs index 989d32de9..71fab1e50 100644 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -36,7 +36,8 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result Response> { // read timeout of 2s let read_timeout = 2000; + let body_future = async { + let mut body = BytesMut::with_capacity(MAX_IPV6_BUF_SIZE); + while let Ok(b) = http_stream.read_request_body().await { + match b { + None => break, // finished reading request + Some(b) => body.put(b) + } + } + if body.is_empty() { + body.put("no body!".as_bytes()); + } + body.freeze() + }; + let body = match timeout( Duration::from_millis(read_timeout), - http_stream.read_request_body(), + body_future, ) .await { - Ok(res) => match res.unwrap() { - Some(bytes) => bytes, - None => Bytes::from("no body!"), - }, + Ok(res) => res, Err(_) => { panic!("Timed out after {:?}ms", read_timeout); } @@ -69,7 +81,9 @@ pub struct MyServer { } fn entry_point(opt: Option) { - env_logger::init(); + env_logger::builder() + .format_timestamp(Some(env_logger::TimestampPrecision::Nanos)) + .init(); let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR")); let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR")); From 5cc981a653bf71c2b08d7b37063a977830ac4446 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Sun, 5 Jan 2025 13:57:39 +0100 Subject: [PATCH 08/52] detect socket settings during Quic listener creation enhance Quic handshake: - loop send & receive - error handling, logging read multiple body chunks in EchoApp --- pingora-core/src/protocols/l4/quic/mod.rs | 78 ++++++----- pingora-core/src/protocols/l4/quic/sendto.rs | 8 +- .../src/protocols/l4/quic/tls_handshake.rs | 126 ++++++++---------- pingora-core/tests/utils/mod.rs | 4 +- 4 files changed, 104 insertions(+), 112 deletions(-) diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 7080b61c1..99e546a9c 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -16,19 +16,18 @@ use tokio::net::UdpSocket; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::Notify; -use pingora_error::{BError, Error, ErrorType, OrErr, Result}; +use pingora_error::{BError, Error, ErrorType, Result}; use quiche::Connection as QuicheConnection; use tokio::task::JoinHandle; use settings::Settings as QuicSettings; -#[allow(unused)] // TODO: remove mod sendto; mod id_token; pub(crate) mod tls_handshake; mod settings; use crate::protocols::ConnectionState; -use crate::protocols::l4::quic::sendto::send_to; +use crate::protocols::l4::quic::sendto::{detect_gso, send_to, set_txtime_sockopt}; use crate::protocols::l4::stream::Stream as L4Stream; // UDP header 8 bytes, IPv4 Header 20 bytes @@ -48,7 +47,7 @@ const HANDSHAKE_PACKET_BUFFER_SIZE: usize = 64; pub struct Listener { socket: Arc, - socket_addr: SocketAddr, + socket_details: SocketDetails, config: Arc>, crypto: Crypto, @@ -66,10 +65,11 @@ pub enum Connection { } pub struct IncomingState { - id: ConnectionId<'static>, + connection_id: ConnectionId<'static>, config: Arc>, socket: Arc, + socket_details: SocketDetails, udp_rx: Receiver, response_tx: Sender, @@ -79,10 +79,18 @@ pub struct IncomingState { reject: bool } +#[derive(Clone)] +struct SocketDetails { + addr: SocketAddr, + gso_enabled: bool, + pacing_enabled: bool, +} + pub struct EstablishedState { socket: Arc, tx_handle: JoinHandle>, + connection_id: String, pub connection: Arc>, pub tx_notify: Arc, pub rx_notify: Arc, @@ -131,7 +139,7 @@ pub struct UdpRecv { impl TryFrom for Listener { type Error = BError; - fn try_from(io: UdpSocket) -> pingora_error::Result { + fn try_from(io: UdpSocket) -> Result { let addr = io.local_addr() .map_err(|e| Error::explain( ErrorType::SocketError, @@ -144,9 +152,25 @@ impl TryFrom for Listener { let settings = QuicSettings::try_default()?; + let gso_enabled = detect_gso(&io, MAX_IPV6_QUIC_DATAGRAM_SIZE); + let pacing_enabled = match set_txtime_sockopt(&io) { + Ok(_) => { + debug!("successfully set SO_TXTIME socket option"); + true + }, + Err(e) => { + debug!("setsockopt failed {:?}", e); + false + }, + }; + Ok(Listener { socket: Arc::new(io), - socket_addr: addr, + socket_details: SocketDetails { + addr, + gso_enabled, + pacing_enabled, + }, config: settings.get_config(), crypto: Crypto { @@ -182,7 +206,7 @@ impl Listener { // connection needs to be able to update source_ids() or destination_ids() let recv_info = RecvInfo { - to: self.socket_addr, + to: self.socket_details.addr, from, }; @@ -271,10 +295,11 @@ impl Listener { trace!("new incoming connection {:?}", conn_id); let connection = Connection::Incoming(IncomingState { - id: conn_id.clone(), + connection_id: conn_id.clone(), config: self.config.clone(), socket: self.socket.clone(), + socket_details: self.socket_details.clone(), udp_rx, response_tx, @@ -345,7 +370,7 @@ impl ConnectionHandle { } impl Connection { - async fn establish(&mut self, state: EstablishedState) -> Result<()> { + fn establish(&mut self, state: EstablishedState) -> Result<()> { if cfg!(test) { let conn = state.connection.lock(); debug_assert!(conn.is_established() || conn.is_in_early_data(), @@ -353,20 +378,9 @@ impl Connection { } match self { Connection::Incoming(s) => { - /* - // consume packets that potentially arrived during state transition - while !s.udp_rx.is_empty() { - error!("consuming {} packets which arrived during state transition", s.udp_rx.len()); - let mut dgram= s.udp_rx.recv().await; - if let Some(mut dgram) = dgram { - let mut qconn = state.connection.lock(); - qconn.recv(&mut dgram.pkt.as_mut_slice(), dgram.recv_info).explain_err( - ErrorType::ReadError, - |e| format!("receiving dgram on quic connection failed with {:?}", e))?; - } - } - s.udp_rx.close(); - */ + debug_assert!(s.udp_rx.is_empty(), + "udp rx channel must be empty when establishing the connection"); + debug!("connection {:?} established", state.connection_id); let _ = mem::replace(self, Connection::Established(state)); Ok(()) } @@ -393,15 +407,13 @@ impl Drop for Connection { struct ConnectionTx { socket: Arc, + socket_details: SocketDetails, connection: Arc>, connection_id: String, tx_notify: Arc, tx_stats: TxBurst, - - gso_enabled: bool, - pacing_enabled: bool, } impl ConnectionTx { @@ -468,8 +480,8 @@ impl ConnectionTx { &out[..total_write], &dst_info, self.tx_stats.max_datagram_size, - self.pacing_enabled, - self.gso_enabled, + self.socket_details.pacing_enabled, + self.socket_details.gso_enabled, ).await { if e.kind() == io::ErrorKind::WouldBlock { error!("network socket would block"); @@ -560,11 +572,11 @@ impl AsyncWrite for Connection { self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &[u8], - ) -> Poll> { + ) -> Poll> { todo!() } - fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { // FIXME: this is called on l4::Stream::drop() // correlates to the connection, check if stopping tx loop for connection & final flush is feasible Poll::Ready(Ok(())) @@ -573,7 +585,7 @@ impl AsyncWrite for Connection { fn poll_shutdown( self: Pin<&mut Self>, cx: &mut Context<'_>, - ) -> Poll> { + ) -> Poll> { todo!() } } @@ -584,7 +596,7 @@ impl AsyncRead for Connection { self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>, - ) -> Poll> { + ) -> Poll> { todo!() } } diff --git a/pingora-core/src/protocols/l4/quic/sendto.rs b/pingora-core/src/protocols/l4/quic/sendto.rs index a8052e6fa..b35d8e611 100644 --- a/pingora-core/src/protocols/l4/quic/sendto.rs +++ b/pingora-core/src/protocols/l4/quic/sendto.rs @@ -65,15 +65,13 @@ fn send_to_gso_pacing( let cmsg_gso = ControlMessage::UdpGsoSegments(&segment_size); // Pacing option. - // TODO: fix & enable - //let send_time = std_time_to_u64(&send_info.at); - //let cmsg_txtime = ControlMessage::TxTime(&send_time); + let send_time = std_time_to_u64(&send_info.at); + let cmsg_txtime = ControlMessage::TxTime(&send_time); match sendmsg( sockfd, &iov, - // &[cmsg_gso, cmsg_txtime], - &[cmsg_gso], + &[cmsg_gso, cmsg_txtime], MsgFlags::empty(), Some(&dst), ) { diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs index 71fab1e50..da2539ab0 100644 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -6,9 +6,8 @@ use tokio::net::UdpSocket; use tokio::sync::Notify; use pingora_error::{Error, ErrorType, OrErr}; use crate::protocols::ConnectionState; -use crate::protocols::l4::quic::{Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, IncomingState, TxBurst, MAX_IPV6_QUIC_DATAGRAM_SIZE, MAX_IPV6_UDP_PACKET_SIZE}; +use crate::protocols::l4::quic::{Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, IncomingState, TxBurst, MAX_IPV6_UDP_PACKET_SIZE}; use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; -use crate::protocols::l4::quic::sendto::{detect_gso, set_txtime_sockopt}; use crate::protocols::l4::stream::Stream as L4Stream; pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { @@ -25,7 +24,7 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result> { let IncomingState { - id, + connection_id: conn_id, config, socket, + socket_details, udp_rx, dgram, @@ -61,12 +60,12 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result (size, info), + Err(quiche::Error::Done) => break 'tx, + Err(e) => return Err(e).explain_err( + ErrorType::WriteError, |_| "creating handshake packet failed"), + }; + + trace!("sending handshake packet"); + send_dgram(&socket, &out[..size], info.to).await + .explain_err(ErrorType::WriteError, |_| "sending handshake packet failed")?; + } + trace!("waiting for handshake response"); - if let Some(mut dgram) = udp_rx.recv().await { - trace!("received handshake response"); - let buf = dgram.pkt.as_mut_slice(); - conn.recv(buf, dgram.recv_info) - .map_err(|e| Error::explain( + 'rx: loop { + if let Some(mut dgram) = udp_rx.recv().await { + trace!("received handshake response"); + conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) + .explain_err( + ErrorType::HandshakeError, |_| "receiving handshake response failed")?; + } else { + return Err(Error::explain( ErrorType::HandshakeError, - format!("receiving handshake response failed with {:?}", e)))?; - } else { - return Err(Error::explain( - ErrorType::HandshakeError, - "finishing handshake failed, did not receive a response")); + "finishing handshake failed, did not receive a response")); + } + if udp_rx.is_empty() { + break 'rx; + } } trace!("connection established={}, early_data={}, closed={}, draining={}, readable={}, timed_out={}, resumed={}", @@ -235,21 +227,11 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result { - debug!("successfully set SO_TXTIME socket option"); - true - }, - Err(e) => { - debug!("setsockopt failed {:?}", e); - false - }, - }; let tx_notify = Arc::new(Notify::new()); let rx_notify = Arc::new(Notify::new()); @@ -258,18 +240,18 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result Date: Mon, 6 Jan 2025 08:31:35 +0100 Subject: [PATCH 09/52] H3 session housekeeping, stream capacity enhancements --- pingora-core/src/protocols/http/v3/server.rs | 156 +++++++++++-------- 1 file changed, 93 insertions(+), 63 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 8778ea830..ffec8fa2e 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -14,6 +14,7 @@ //! HTTP/3 server session +use std::cmp; use std::fmt::Debug; use crate::protocols::{Digest, SocketAddr, Stream}; use bytes::{BufMut, Bytes, BytesMut}; @@ -36,7 +37,8 @@ use crate::protocols::l4::quic::{Connection, MAX_IPV6_QUIC_DATAGRAM_SIZE}; use quiche::{h3, Connection as QuicheConnection}; use quiche::h3::{Connection as QuicheH3Connection, Event, NameValue}; use tokio::sync::{mpsc, Notify}; -use tokio::sync::mpsc::Receiver; +use tokio::sync::mpsc::{Receiver, Sender}; +use tokio::sync::mpsc::error::TrySendError; use crate::protocols::http::body_buffer::FixedBuffer; use crate::protocols::http::v3::{event_to_request_headers, response_headers_to_event}; use crate::protocols::http::v3::nohash::StreamIdHashMap; @@ -44,6 +46,7 @@ use crate::protocols::http::v3::nohash::StreamIdHashMap; static H3_OPTIONS: OnceLock = OnceLock::new(); const H3_SESSION_EVENTS_CHANNEL_SIZE : usize = 256; +const H3_SESSION_DROP_CHANNEL_SIZE : usize = 1024; /// Perform HTTP/3 connection handshake with an established (QUIC) connection. /// @@ -54,7 +57,7 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

) -> Result

) -> Result

, rx_notify: Arc, - sessions: StreamIdHashMap> + sessions: StreamIdHashMap>, + drop_sessions: (Sender, Receiver) } impl H3Connection { @@ -117,6 +121,9 @@ pub struct HttpSession { quic_connection: Arc>, h3_connection: Arc>, + // notify during drop to remove event_tx from active sessions + drop_session: Sender, + // trigger Quic send, continue ConnectionTx write loop tx_notify: Arc, // receive notification on Quic recv, used to check stream capacity @@ -146,6 +153,21 @@ pub struct HttpSession { digest: Arc, } +impl Drop for HttpSession { + fn drop(&mut self) { + match self.drop_session.try_send(self.stream_id) { + Ok(()) => debug!("drop stream {}", self.stream_id), + Err(e) => { + let id = match e { + TrySendError::Full(id) => id, + TrySendError::Closed(id) => id + }; + warn!("stream {} failed notify drop session", id) + } + } + } +} + #[allow(unused)] // TODO: remove impl HttpSession { /// Create a new [`HttpSession`] from the QUIC connection. @@ -177,7 +199,7 @@ impl HttpSession { Ok((stream_id, ev)) => { if let Some(channel) = conn.sessions.get(&stream_id) { debug!( - "HTTP3 conn_id={} stream_id={} forward event={:?}", + "H3 connection {} stream {} forward event={:?}", conn.id, stream_id, ev ); channel.send(ev).await @@ -186,7 +208,7 @@ impl HttpSession { |e| format!("failed to send on event channel with {}", e))?; } else { debug!( - "HTTP3 conn_id={} stream_id={} received event {:?}", + "H3 connection {} stream {} received event {:?}", conn.id, stream_id, &ev ); match ev { @@ -202,7 +224,7 @@ impl HttpSession { }, Event::Headers { list, more_frames: stream_continues } => { trace!( - "HTTP3 conn_id={} request headers: {:?}, more_frames: {:?}", + "H3 connection {} request headers={:?}, more_frames={:?}", conn.id, &list, &stream_continues @@ -216,6 +238,8 @@ impl HttpSession { quic_connection: conn.quic_connection.clone(), h3_connection: conn.h3_connection.clone(), + drop_session: conn.drop_sessions.0.clone(), + tx_notify: conn.tx_notify.clone(), rx_notify: conn.rx_notify.clone(), event_rx, @@ -233,44 +257,40 @@ impl HttpSession { }; if let Some(_) = conn.sessions.insert(stream_id, event_tx) { - debug_assert!(false, "existing session is not allowed. {stream_id}") + debug_assert!(false, "H3 connection {} stream {} existing session is not allowed", conn.id, stream_id) }; return Ok(Some(session)); } } } } - Err(quiche::h3::Error::Done) => { - debug!("HTTP3 conn_id={} no events available", conn.id); + Err(h3::Error::Done) => { + debug!("H3 connection {} no events available", conn.id); // TODO: in case PriorityUpdate was triggered take_priority_update should be called here - let is_active; let timeout; { let mut qconn = conn.quic_connection.lock(); - if qconn.is_closed() { + if qconn.is_closed() || + !(qconn.is_established() || qconn.is_in_early_data()) { + warn!("open sessions: {:?}", conn.sessions.keys()); return Ok(None) } - is_active = qconn.is_established() && !qconn.is_in_early_data(); timeout = qconn.timeout(); } - if is_active { - debug!("Quic connection {:?} is still active. Timeout: {:?}", conn.id, timeout); - if let Some(timeout) = timeout { - // race for new data on connection or timeout - tokio::select! { - _timeout = tokio::time::sleep(timeout) => { - let mut qconn = conn.quic_connection.lock(); - qconn.on_timeout(); - } - _data = conn.rx_notify.notified() => {} + debug!("Quic connection {:?} is still active. Timeout: {:?}", conn.id, timeout); + if let Some(timeout) = timeout { + // race for new data on connection or timeout + tokio::select! { + _timeout = tokio::time::sleep(timeout) => { + let mut qconn = conn.quic_connection.lock(); + qconn.on_timeout(); } - }; - - continue 'poll - } + _data = conn.rx_notify.notified() => {} + } + }; - debug!("HTTP3 conn_id={} waiting for data", conn.id); + debug!("H3 connection {} waiting for data", conn.id); continue 'poll; } Err(err) => { @@ -280,6 +300,12 @@ impl HttpSession { }) } } + + while !conn.drop_sessions.1.is_empty() { + if let Some(stream_id) = conn.drop_sessions.1.recv().await { + conn.sessions.remove(&stream_id); + } + } } } @@ -335,7 +361,7 @@ impl HttpSession { let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); debug!( - "HTTP3 conn_id={} stream_id={} receiving body", + "H3 connection {} stream {} receiving body", qconn.trace_id(), self.stream_id ); @@ -356,10 +382,10 @@ impl HttpSession { ) -> Result<()> { if self.send_ended { // TODO: error or warn? - warn!("Http session already ended."); + warn!("H3 session already ended"); return Ok(()); } else if self.response_header_written.as_ref().is_some() { - warn!("Response header is already sent, cannot send again"); + warn!("response header is already sent, cannot send again"); return Ok(()); } @@ -386,17 +412,6 @@ impl HttpSession { let headers = response_headers_to_event(&header); let sent = self.send_response(headers.as_slice(), end).await; - match sent { - Ok(()) => { - self.tx_notify.notify_one(); - } - Err(h3::Error::Done) => {}, - Err(e) => return Err(e) - .explain_err( - ErrorType::WriteError, - |e| format!("failed to write response to http3 connection with {}", e)), - } - self.response_header_written = Some(header); self.send_ended = self.send_ended || end; Ok(()) @@ -406,20 +421,38 @@ impl HttpSession { &self, headers: &[T], fin: bool, - ) -> h3::Result<()> { + ) -> Result<()> { + let headers_len = headers + .iter() + .fold(0, |acc, h| acc + h.value().len() + h.name().len() + 32); + + let capacity = self.stream_capacity(headers_len).await + .explain_err( + ErrorType::WriteError, + |_| format!("H3 connection {} failed to acquire capacity for stream {}", + self.connection_id, self.stream_id))?; + let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); - // TODO: use qconn.stream_capacity(stream_id) or qconn.stream_writeable(stream_id) - // eventually retry in case send_response returns a StreamBlocked error debug!( - "HTTP3 conn_id={} stream_id={} sending response headers={:?}, finished={}", + "H3 connection {} stream {} sending response headers={:?}, finished={}", qconn.trace_id(), self.stream_id, headers, fin ); - hconn.send_response(&mut qconn, self.stream_id, headers, fin) + + match hconn.send_response(&mut qconn, self.stream_id, headers, fin) { + Ok(()) => { + self.tx_notify.notify_one(); + Ok(()) + } + Err(h3::Error::Done) => { Ok(()) }, + Err(e) => Err(e).explain_err( + ErrorType::WriteError, + |_| "H3 connection failed to write response"), + } } /// Write response body to the client. See [Self::write_response_header] for how to use `end`. @@ -432,13 +465,14 @@ impl HttpSession { } else if self.response_header_written.is_none() { return Err(Error::explain( ErrorType::H3Error, - "Trying to send the body before header being sent.", + "trying to send the body before header being sent", )); }; let mut sent_len = 0; while sent_len < data.len() { - let capacity = self.stream_capacity().await + let required = cmp::min(data.len(), MAX_IPV6_QUIC_DATAGRAM_SIZE); + let capacity = self.stream_capacity(required).await .explain_err( ErrorType::WriteError, |e| format!("Failed to acquire capacity on stream id {} with {}", self.stream_id, e))?; @@ -455,12 +489,8 @@ impl HttpSession { sent_len += sent_size; self.tx_notify.notify_one(); }, - Err(_e) => { - return Err(Error::explain( - ErrorType::WriteError, - format!("Writing h3 response body to downstream failed. {}", _e), - )) - } + Err(e) => return Err(e).explain_err( + ErrorType::WriteError, |_| "writing h3 response body to downstream") } } @@ -473,24 +503,24 @@ impl HttpSession { let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); - debug!("HTTP3 conn_id={} stream_id={} sending response body with length={:?}, finished={}", + debug!("H3 connection {} stream {} sending response body with length={:?}, finished={}", self.connection_id, self.stream_id, body.len(), fin); hconn.send_body(&mut qconn, self.stream_id, body, fin) } - async fn stream_capacity(&self) -> quiche::Result { + async fn stream_capacity(&self, required: usize) -> quiche::Result { let capacity; { let qconn = self.quic_connection.lock(); capacity = qconn.stream_capacity(self.stream_id)?; } - if capacity > 0 { + if capacity >= required { Ok(capacity) } else { self.rx_notify.notified().await; - Box::pin(self.stream_capacity()).await + Box::pin(self.stream_capacity(required)).await } } @@ -564,7 +594,7 @@ impl HttpSession { } None => return Err(Error::explain( ErrorType::ReadError, - "HTTP3 Session event channel disconnected.")), + "H3 session event channel disconnected")), } } } From 63d1ff96c3694859be28c9a3d596f08d829e7013 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Mon, 6 Jan 2025 14:47:07 +0100 Subject: [PATCH 10/52] H3 connection shutdown, goaway & H3 session trailers, idle --- pingora-core/src/apps/mod.rs | 7 +- pingora-core/src/protocols/http/server.rs | 5 +- pingora-core/src/protocols/http/v1/client.rs | 1 + pingora-core/src/protocols/http/v3/mod.rs | 22 +- pingora-core/src/protocols/http/v3/server.rs | 373 ++++++++++++++----- 5 files changed, 297 insertions(+), 111 deletions(-) diff --git a/pingora-core/src/apps/mod.rs b/pingora-core/src/apps/mod.rs index 344dd72eb..0fc69d214 100644 --- a/pingora-core/src/apps/mod.rs +++ b/pingora-core/src/apps/mod.rs @@ -241,9 +241,10 @@ where // TODO: add a timeout? let h3_stream = tokio::select! { _ = shutdown.changed() => { - h3_conn.graceful_shutdown().await; - let _ = poll_fn(|cx| h3_conn.poll_closed(cx)) - .await.map_err(|e| error!("H3 error waiting for shutdown {e}")); + match h3_conn.graceful_shutdown().await { + Ok(()) => {} + Err(e) => { error!("H3 error waiting for shutdown {e}") } + }; return None; } h3_stream = h3_server::HttpSession::from_h3_conn(&mut h3_conn, digest.clone()) => h3_stream diff --git a/pingora-core/src/protocols/http/server.rs b/pingora-core/src/protocols/http/server.rs index 677d8c96d..869dee2f4 100644 --- a/pingora-core/src/protocols/http/server.rs +++ b/pingora-core/src/protocols/http/server.rs @@ -183,13 +183,14 @@ impl Session { match self { Self::H1(_) => Ok(()), // TODO: support trailers for h1 Self::H2(s) => s.write_trailers(trailers), - Self::H3(s) => s.write_trailers(trailers), + Self::H3(s) => s.write_trailers(trailers).await, } } /// Finish the life of this request. /// For H1, if connection reuse is supported, a Some(Stream) will be returned, otherwise None. /// For H2, always return None because H2 stream is not reusable. + /// for H3, this will send a FIN_STREAM frame on the underlying QUIC stream pub async fn finish(self) -> Result> { match self { Self::H1(mut s) => { @@ -294,7 +295,7 @@ impl Session { /// Give up the http session abruptly. /// For H1 this will close the underlying connection /// For H2 this will send a RESET frame to end this stream - /// For H3 this will send a RESET_STREAM QUIC frame on the underlying QUIC stream + /// For H3 this will send a STOP_SENDING & RESET_STREAM QUIC frame on the underlying stream /// For H2 & H3 a call has no impact on the connection pub async fn shutdown(&mut self) { match self { diff --git a/pingora-core/src/protocols/http/v1/client.rs b/pingora-core/src/protocols/http/v1/client.rs index 29198b5a7..7f5c5e1be 100644 --- a/pingora-core/src/protocols/http/v1/client.rs +++ b/pingora-core/src/protocols/http/v1/client.rs @@ -710,6 +710,7 @@ pub(crate) fn http_req_header_to_wire(req: &RequestHeader) -> Option { Version::HTTP_10 => "HTTP/1.0", Version::HTTP_11 => "HTTP/1.1", Version::HTTP_2 => "HTTP/2", + Version::HTTP_3 => "HTTP/3", _ => { return None; /*TODO: unsupported version */ } diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 237f1d248..bcbb612e2 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -14,15 +14,17 @@ //! HTTP/3 implementation +use std::fmt::Debug; use http::{HeaderMap, HeaderName, HeaderValue, Request, Uri, Version}; use log::warn; use quiche::h3::{Header, NameValue}; use pingora_http::{RequestHeader, ResponseHeader}; +use pingora_error::{ErrorType, OrErr, Result}; pub mod server; pub mod nohash; -pub fn event_to_request_headers(list: &Vec
) -> RequestHeader { +pub fn event_to_request_headers(list: &Vec
) -> Result { let (mut parts, _) = Request::new(()).into_parts(); let mut uri = Uri::builder(); let mut headers = HeaderMap::new(); @@ -57,9 +59,10 @@ pub fn event_to_request_headers(list: &Vec
) -> RequestHeader { } parts.version = Version::HTTP_3; - parts.uri = uri.build().unwrap(); // TODO: use result + parts.uri = uri.build() + .explain_err(ErrorType::H3Error, |_| "failed to convert event parts to request uri")?; parts.headers = headers; - parts.into() + Ok(parts.into()) } fn response_headers_to_event(resp: &ResponseHeader) -> Vec
{ @@ -70,4 +73,17 @@ fn response_headers_to_event(resp: &ResponseHeader) -> Vec
{ qheaders.push(Header::new(k.as_str().as_bytes(), v.as_bytes())) } qheaders +} + +fn headermap_to_headervec(headers: &HeaderMap) -> Vec
{ + headers + .iter() + .map(|(k, v)| Header::new(k.as_str().as_bytes(), v.as_bytes())) + .collect() +} + +fn header_size(headers: &[T]) -> usize { + headers + .iter() + .fold(0, |acc, h| acc + h.value().len() + h.name().len() + 32) } \ No newline at end of file diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index ffec8fa2e..f3ffda5d2 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -21,10 +21,8 @@ use bytes::{BufMut, Bytes, BytesMut}; use http::uri::PathAndQuery; use http::{header, HeaderMap, HeaderName}; use pingora_error::{Error, ErrorType, OrErr, Result}; -use std::future::Future; -use std::pin::Pin; use std::sync::{Arc, OnceLock}; -use std::task::{Context, Poll}; +use std::time::Duration; use log::{debug, error, info, trace, warn}; use parking_lot::Mutex; use crate::protocols::http::v1::client::http_req_header_to_wire; @@ -34,19 +32,21 @@ use crate::protocols::http::date::get_cached_date; use crate::protocols::http::HttpTask; pub use quiche::h3::Config as H3Options; use crate::protocols::l4::quic::{Connection, MAX_IPV6_QUIC_DATAGRAM_SIZE}; -use quiche::{h3, Connection as QuicheConnection}; +use quiche::{h3, Connection as QuicheConnection, Shutdown}; use quiche::h3::{Connection as QuicheH3Connection, Event, NameValue}; use tokio::sync::{mpsc, Notify}; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::mpsc::error::TrySendError; use crate::protocols::http::body_buffer::FixedBuffer; -use crate::protocols::http::v3::{event_to_request_headers, response_headers_to_event}; +use crate::protocols::http::v3::{event_to_request_headers, header_size, headermap_to_headervec, response_headers_to_event}; use crate::protocols::http::v3::nohash::StreamIdHashMap; static H3_OPTIONS: OnceLock = OnceLock::new(); const H3_SESSION_EVENTS_CHANNEL_SIZE : usize = 256; const H3_SESSION_DROP_CHANNEL_SIZE : usize = 1024; +const BODY_BUF_LIMIT: usize = 1024 * 64; +const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); /// Perform HTTP/3 connection handshake with an established (QUIC) connection. /// @@ -82,19 +82,21 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

>, h3_connection: Arc>, @@ -102,15 +104,52 @@ pub struct H3Connection { rx_notify: Arc, sessions: StreamIdHashMap>, - drop_sessions: (Sender, Receiver) + drop_sessions: (Sender, Receiver), + + max_accepted_stream_id: u64, + received_goaway: Option } impl H3Connection { - pub async fn graceful_shutdown(&mut self) { - todo!(); - } - pub fn poll_closed(&mut self, _cx: &mut Context<'_>) -> Poll> { - todo!(); + pub async fn graceful_shutdown(&mut self) -> Result<()> { + // send GOAWAY frame + { + let mut qconn = self.quic_connection.lock(); + let mut hconn = self.h3_connection.lock(); + + debug!("H3 connection {} sending GoAway", self.connection_id); + hconn.send_goaway(&mut qconn,self.max_accepted_stream_id) + .explain_err(ErrorType::H3Error, |_| "failed to send graceful shutdown")?; + self.tx_notify.notify_one(); + } + + let drain = async { + while !self.sessions.is_empty() { + self.rx_notify.notified().await + } + }; + + // wait for open sessions to drain + let mut is_timeout = false; + tokio::select! { + _successful_drain = drain => { debug!("h3 successfully drained active sessions") } + _timeout = tokio::time::sleep(SHUTDOWN_GOAWAY_DRAIN_TIMEOUT) => { is_timeout = true } + } + + // close quic connection + { + let mut qconn = self.quic_connection.lock(); + qconn.close(false, 0x00, b"graceful shutdown") + .explain_err(ErrorType::H3Error, |_| "failed to close quic connection")?; + self.tx_notify.notify_one(); + } + + if is_timeout { + Err(Error::explain( + ErrorType::InternalError, "h3 session draining timed out with active sessions")) + } else { + Ok(()) + } } } @@ -133,7 +172,9 @@ pub struct HttpSession { // HTTP3 event channel for this stream_id event_rx: Receiver, - request_header: Option, + request_header: RequestHeader, + // required as separate field for has_body + request_has_body: bool, read_ended: bool, body_read: usize, // buffered request body for retry logic @@ -168,7 +209,6 @@ impl Drop for HttpSession { } } -#[allow(unused)] // TODO: remove impl HttpSession { /// Create a new [`HttpSession`] from the QUIC connection. /// This function returns a new HTTP/3 session when the provided HTTP/3 connection, `conn`, @@ -197,10 +237,17 @@ impl HttpSession { match poll { Ok((stream_id, ev)) => { + if let Some(goaway_id) = conn.received_goaway { + // do not accept new streams, continue processing existing streams + if stream_id >= goaway_id { + continue 'poll; + } + } + if let Some(channel) = conn.sessions.get(&stream_id) { debug!( "H3 connection {} stream {} forward event={:?}", - conn.id, stream_id, ev + conn.connection_id, stream_id, ev ); channel.send(ev).await .explain_err( @@ -209,7 +256,7 @@ impl HttpSession { } else { debug!( "H3 connection {} stream {} received event {:?}", - conn.id, stream_id, &ev + conn.connection_id, stream_id, &ev ); match ev { Event::Data @@ -219,20 +266,22 @@ impl HttpSession { debug_assert!(false, "event type requires corresponding session") } Event::GoAway => { - info!("Received GoAway, dropping connection."); - return Ok(None) + info!("stream_id {} received GoAway", stream_id); + conn.received_goaway = Some(stream_id); }, - Event::Headers { list, more_frames: stream_continues } => { + Event::Headers { list, more_frames } => { trace!( "H3 connection {} request headers={:?}, more_frames={:?}", - conn.id, + conn.connection_id, &list, - &stream_continues + &more_frames ); - let (event_tx, event_rx) = mpsc::channel(H3_SESSION_EVENTS_CHANNEL_SIZE); + let (event_tx, event_rx) = + mpsc::channel(H3_SESSION_EVENTS_CHANNEL_SIZE); + let session = HttpSession { - connection_id: conn.id.clone(), + connection_id: conn.connection_id.clone(), stream_id, quic_connection: conn.quic_connection.clone(), @@ -244,8 +293,9 @@ impl HttpSession { rx_notify: conn.rx_notify.clone(), event_rx, - read_ended: !stream_continues, - request_header: Some(event_to_request_headers(&list)), + request_header: event_to_request_headers(&list)?, + request_has_body: more_frames, + read_ended: !more_frames, body_read: 0, body_retry_buffer: None, @@ -257,28 +307,34 @@ impl HttpSession { }; if let Some(_) = conn.sessions.insert(stream_id, event_tx) { - debug_assert!(false, "H3 connection {} stream {} existing session is not allowed", conn.id, stream_id) + debug_assert!(false, "H3 connection {} stream {} existing \ + session is not allowed", conn.connection_id, stream_id) }; + + conn.max_accepted_stream_id = session.stream_id; return Ok(Some(session)); } } } } Err(h3::Error::Done) => { - debug!("H3 connection {} no events available", conn.id); - // TODO: in case PriorityUpdate was triggered take_priority_update should be called here + debug!("H3 connection {} no events available", conn.connection_id); + // TODO: in case PriorityUpdate was triggered call take_priority_update() here let timeout; { - let mut qconn = conn.quic_connection.lock(); + let qconn = conn.quic_connection.lock(); if qconn.is_closed() || !(qconn.is_established() || qconn.is_in_early_data()) { - warn!("open sessions: {:?}", conn.sessions.keys()); + if conn.sessions.len() > 0 { + warn!("h3 connection {} closed with open {} sessions", + conn.connection_id, conn.sessions.len()); + } return Ok(None) } timeout = qconn.timeout(); } - debug!("Quic connection {:?} is still active. Timeout: {:?}", conn.id, timeout); + debug!("Quic connection {:?} is still active. Timeout: {:?}", conn.connection_id, timeout); if let Some(timeout) = timeout { // race for new data on connection or timeout tokio::select! { @@ -290,14 +346,19 @@ impl HttpSession { } }; - debug!("H3 connection {} waiting for data", conn.id); + debug!("H3 connection {} waiting for data", conn.connection_id); continue 'poll; } - Err(err) => { - info!("Received error, dropping connection. {:?}", err); - return Err(err).explain_err(ErrorType::H3Error, |e| { - format!("While accepting new downstream requests. Error: {e}") - }) + Err(e) => { + // If an error occurs while processing data, the connection is closed with + // the appropriate error code, using the transport’s close() method. + + // send the close() event + conn.tx_notify.notify_one(); + + error!("H3 connection closed with error {:?}.", e); + return Err(e).explain_err( + ErrorType::H3Error, |_| "while accepting new downstream requests") } } @@ -314,7 +375,7 @@ impl HttpSession { /// Different from its HTTP/1.X counterpart, this function never panics as the request is already /// read when established a new HTTP/3 stream. pub fn req_header(&self) -> &RequestHeader { - self.request_header.as_ref().unwrap() + &self.request_header } /// A mutable reference to request sent from the client @@ -322,7 +383,7 @@ impl HttpSession { /// Different from its HTTP/1.X counterpart, this function never panics as the request is already /// read when established a new HTTP/3 stream. pub fn req_header_mut(&mut self) -> &mut RequestHeader { - self.request_header.as_mut().unwrap() + &mut self.request_header } /// Read request body bytes. `None` when there is no more body to read. @@ -336,7 +397,7 @@ impl HttpSession { let size = match self.recv_body(&mut buf) { Ok(size) => size, Err(h3::Error::Done) => { - error!("recv_body: Done"); + trace!("recv_body done"); return Ok(Some(BytesMut::with_capacity(0).into())) }, Err(e) => return Err(Error::explain( @@ -357,7 +418,7 @@ impl HttpSession { } - fn recv_body(&self, out: &mut [u8]) -> h3::Result<(usize)> { + fn recv_body(&self, out: &mut [u8]) -> h3::Result { let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); debug!( @@ -381,7 +442,6 @@ impl HttpSession { end: bool, ) -> Result<()> { if self.send_ended { - // TODO: error or warn? warn!("H3 session already ended"); return Ok(()); } else if self.response_header_written.as_ref().is_some() { @@ -389,18 +449,17 @@ impl HttpSession { return Ok(()); } - /* TODO: check if should that be as well handled like that? if header.status.is_informational() { - // ignore informational response 1xx header because send_response() can only be called once + // ignore informational response 1xx header + // send_response() can only be called once in case end = true // https://github.com/hyperium/h2/issues/167 debug!("ignoring informational headers"); return Ok(()); - } */ + } /* update headers */ header.insert_header(header::DATE, get_cached_date())?; - // TODO: check if this is correct for H3 // remove other h1 hop headers that cannot be present in H3 // https://httpwg.org/specs/rfc7540.html#n-connection-specific-header-fields header.remove_header(&header::TRANSFER_ENCODING); @@ -410,7 +469,7 @@ impl HttpSession { header.remove_header(&HeaderName::from_static("proxy-connection")); let headers = response_headers_to_event(&header); - let sent = self.send_response(headers.as_slice(), end).await; + self.send_response(headers.as_slice(), end).await?; self.response_header_written = Some(header); self.send_ended = self.send_ended || end; @@ -422,11 +481,7 @@ impl HttpSession { headers: &[T], fin: bool, ) -> Result<()> { - let headers_len = headers - .iter() - .fold(0, |acc, h| acc + h.value().len() + h.name().len() + 32); - - let capacity = self.stream_capacity(headers_len).await + self.stream_capacity(header_size(headers)).await .explain_err( ErrorType::WriteError, |_| format!("H3 connection {} failed to acquire capacity for stream {}", @@ -450,8 +505,7 @@ impl HttpSession { } Err(h3::Error::Done) => { Ok(()) }, Err(e) => Err(e).explain_err( - ErrorType::WriteError, - |_| "H3 connection failed to write response"), + ErrorType::WriteError, |_| "H3 connection failed to write response"), } } @@ -525,9 +579,57 @@ impl HttpSession { } /// Write response trailers to the client, this also closes the stream. - pub fn write_trailers(&mut self, trailers: HeaderMap) -> Result<()> { - // TODO: use async fn? - todo!(); + pub async fn write_trailers(&mut self, trailers: HeaderMap) -> Result<()> { + if self.send_ended { + warn!("Tried to write trailers after end of stream, dropping them"); + return Ok(()); + } else if self.body_sent <= 0 { + return Err(Error::explain( + ErrorType::H3Error, "Trying to send trailers before body is sent.")); + }; + + let headers = headermap_to_headervec(&trailers); + self.send_additional_headers(self.stream_id, headers.as_slice(), true, true).await?; + + // sending trailers closes the stream + self.send_ended = true; + Ok(()) + } + + // NOTE: as of quiche/v0.22.0 only available in quiche.git/master + async fn send_additional_headers( + &self, + stream_id: u64, + headers: &[T], + is_trailer: bool, + fin: bool, + ) -> Result<()> { + self.stream_capacity(header_size(headers)).await + .explain_err( + ErrorType::WriteError, + |_| format!("H3 connection {} failed to acquire capacity for stream {}", + self.connection_id, self.stream_id))?; + + let mut qconn = self.quic_connection.lock(); + let mut hconn = self.h3_connection.lock(); + + debug!( + "H3 connection {} stream {} sending additional headers={:?}, is_trailer={:?} finished={}", + qconn.trace_id(), + self.stream_id, + headers, + is_trailer, + fin + ); + + match hconn.send_additional_headers(&mut qconn, stream_id, headers, is_trailer, fin) { + Ok(()) => { + self.tx_notify.notify_one(); + Ok(()) + } + Err(e) => Err(e).explain_err( + ErrorType::WriteError, |_| "H3 connection failed to write h3 trailers to downstream"), + } } /// Similar to [Self::write_response_header], this function takes a reference instead @@ -546,24 +648,21 @@ impl HttpSession { /// Dropping this object without sending `end` will cause an error to the client, which will cause /// the client to treat this session as bad or incomplete. pub async fn finish(&mut self) -> Result<()> { - // TODO: check/validate with documentation on protocols::http::server::HttpSession - // TODO: check/validate trailer sending if self.send_ended { // already ended the stream return Ok(()); } - // use an empty data frame to signal the end - self.send_body(&[], true) - .await - .explain_err( - ErrorType::WriteError, - |e| format! {"Writing h3 response body to downstream failed. {e}"}, - )?; - - self.send_ended = true; + if self.response_header_written.is_some() { + // use an empty data frame to signal the end + self.send_body(&[], true).await + .explain_err( + ErrorType::WriteError, + |e| format! {"Writing h3 response body to downstream failed. {e}"}, + )?; + self.send_ended = true; + } // else: the response header is not sent, do nothing now. - // When send_response_body is dropped, an RST_STREAM will be sent Ok(()) } @@ -584,10 +683,29 @@ impl HttpSession { Event::Data => { return Ok(()) } - // TODO: handle events correctly - Event::Reset(_) | - Event::PriorityUpdate | + Event::Reset(error_code) => { + return Err(Error::explain( + ErrorType::H3Error, + format!("stream was reset with error code {}", error_code))) + } + Event::PriorityUpdate => { + // TODO: this step should be deferred until + // h3::Connection::poll() returns Error::Done + // see also h3::Connection::send_response_with_priority() + + /* + // https://datatracker.ietf.org/doc/rfc9218/ + let mut hconn = self.h3_connection.lock(); + // field value has the same content as the header::Priority field + let field_value = hconn.take_last_priority_update(self.stream_id) + .explain_err(ErrorType::H3Error, "failed to receive priority update field value")?; + */ + warn!("received unhandled priority update"); + continue + } Event::GoAway => { + // RFC 9114 Section 5.2 & 7.2.6 + warn!("received unhandled go-away"); continue }, } @@ -599,6 +717,33 @@ impl HttpSession { } } + async fn reset_event(&mut self) -> Result { + loop { + match self.event_rx.recv().await { + Some(ev) => { + trace!("event {:?}", ev); + match ev { + Event::Data | + Event::Finished | + Event::GoAway | + Event::PriorityUpdate => { + continue + } + Event::Headers { .. } => { + continue + } + Event::Reset(error_code) => { + return Ok(error_code) + } + } + } + None => return Err(Error::explain( + ErrorType::ReadError, + "H3 session event channel disconnected")), + } + } + } + pub async fn response_duplex_vec(&mut self, tasks: Vec) -> Result { let mut end_stream = false; for task in tasks.into_iter() { @@ -619,7 +764,7 @@ impl HttpSession { None => end, }, HttpTask::Trailer(Some(trailers)) => { - self.write_trailers(*trailers)?; + self.write_trailers(*trailers).await?; true } HttpTask::Trailer(None) => true, @@ -666,13 +811,27 @@ impl HttpSession { /// /// This will send a `INTERNAL_ERROR` stream error to the client pub fn shutdown(&mut self) { - // TODO: check/validate with documentation on protocols::http::server::HttpSession - // TODO: should this set self.ended? it closes the stream which prevents further writes - todo!(); + if !self.read_ended { + self.stream_shutdown(Shutdown::Read, 2u64); + // sent STOP_SENDING frame & stream_recv() will no longer return data + self.read_ended = true; + } + if !self.send_ended { + self.stream_shutdown(Shutdown::Write, 2u64); + // sent RESET_STREAM & stream_send() data will be ignored + self.send_ended = true; + } } - // This is a hack for pingora-proxy to create subrequests from h2 server session - // TODO: be able to convert from h3 to h1 subrequest + fn stream_shutdown(&self, direction: Shutdown, error_code: u64) { + let mut qconn = self.quic_connection.lock(); + match qconn.stream_shutdown(self.stream_id, direction, error_code) { + Ok(()) => self.tx_notify.notify_one(), + Err(e) => warn!("h3 stream {} shutdown failed. {:?}", self.stream_id, e) + } + } + + // This is a hack for pingora-proxy to create subrequests from h3 server session pub fn pseudo_raw_h1_request_header(&self) -> Bytes { let buf = http_req_header_to_wire(self.req_header()).unwrap(); // safe, None only when version unknown buf.freeze() @@ -680,38 +839,61 @@ impl HttpSession { /// Whether there is no more body to read pub fn is_body_done(&self) -> bool { - todo!(); + self.is_body_empty() || self.read_ended } /// Whether there is any body to read. pub fn is_body_empty(&self) -> bool { - todo!(); + self.request_has_body || self + .request_header + .headers + .get(header::CONTENT_LENGTH) + .map_or(false, |cl| cl.as_bytes() == b"0") } pub fn retry_buffer_truncated(&self) -> bool { - todo!(); + self.body_retry_buffer + .as_ref() + .map_or_else(|| false, |r| r.is_truncated()) } pub fn enable_retry_buffering(&mut self) { - todo!(); + if self.body_retry_buffer.is_none() { + self.body_retry_buffer = Some(FixedBuffer::new(BODY_BUF_LIMIT)) + } } pub fn get_retry_buffer(&self) -> Option { - todo!(); + self.body_retry_buffer.as_ref().and_then(|b| { + if b.is_truncated() { + None + } else { + b.get_buffer() + } + }) } /// `async fn idle() -> Result;` /// This async fn will be pending forever until the client closes the stream/connection /// This function is used for watching client status so that the server is able to cancel /// its internal tasks as the client waiting for the tasks goes away - pub fn idle(&mut self) -> Idle { - Idle(self) + pub async fn idle(&mut self) -> Result<()> { + match self.reset_event().await { + Ok(_error_code) => { Ok(()) } + Err(e) => Err(e) + } } /// Similar to `read_body_bytes()` but will be pending after Ok(None) is returned, /// until the client closes the connection pub async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result> { - todo!(); + if no_body_expected || self.is_body_done() { + let reason = self.reset_event().await?; + Error::e_explain(ErrorType::H3Error, + format!("Client closed H3, reason: {reason}")) + } else { + self.read_body_bytes().await + } } /// Return how many response body bytes (application, not wire) already sent downstream @@ -721,7 +903,7 @@ impl HttpSession { /// Return how many request body bytes (application, not wire) already read from downstream pub fn body_bytes_read(&self) -> usize { - todo!(); + self.body_read } /// Return the [Digest] of the connection. @@ -743,19 +925,4 @@ impl HttpSession { pub fn client_addr(&self) -> Option<&SocketAddr> { self.digest.socket_digest.as_ref().map(|d| d.peer_addr())? } -} - -/// The future to poll for an idle session. -/// -/// Calling `.await` in this object will not return until the client decides to close this stream. -#[allow(unused)] // TODO: remove -pub struct Idle<'a>(&'a mut HttpSession); - -#[allow(unused)] // TODO: remove -impl<'a> Future for Idle<'a> { - type Output = u64; - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - todo!(); - } } \ No newline at end of file From 20bacd44a770c98ab79d281bc0779e3c83bd8575 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Wed, 8 Jan 2025 08:26:48 +0100 Subject: [PATCH 11/52] IO enhancements & handshake fixes --- pingora-core/src/apps/http_app.rs | 13 +- pingora-core/src/protocols/http/v3/mod.rs | 24 +-- pingora-core/src/protocols/http/v3/server.rs | 172 ++++++++++++------ pingora-core/src/protocols/l4/quic/mod.rs | 93 +++++++--- .../src/protocols/l4/quic/settings.rs | 2 +- .../src/protocols/l4/quic/tls_handshake.rs | 51 +++--- 6 files changed, 231 insertions(+), 124 deletions(-) diff --git a/pingora-core/src/apps/http_app.rs b/pingora-core/src/apps/http_app.rs index 91ca58ae1..b59dfedcf 100644 --- a/pingora-core/src/apps/http_app.rs +++ b/pingora-core/src/apps/http_app.rs @@ -67,7 +67,7 @@ where return None; } } - trace!("{:?}", http.req_header()); + //trace!("{:?}", http.req_header()); if *shutdown.borrow() { http.set_keepalive(None); } else { @@ -97,13 +97,10 @@ where ), } } - match http.finish().await { - Ok(c) => c, - Err(e) => { - error!("HTTP server fails to finish the request: {e}"); - None - } - } + http.finish().await.unwrap_or_else(|e| { + error!("HTTP server fails to finish the request: {e}"); + None + }) } } diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index bcbb612e2..1b64b204b 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -36,24 +36,16 @@ pub fn event_to_request_headers(list: &Vec
) -> Result { b":path" => uri = uri.path_and_query(h.value()), b":method" => match h.value().try_into() { Ok(v) => parts.method = v, - Err(_) => { - warn!("Failed to parse method from input: {:?}", h.value()) - } + Err(_) => warn!("Failed to parse method from input: {:?}", h.value()) }, - _ => { - match HeaderName::from_bytes(h.name()) { - Ok(k) => match HeaderValue::from_bytes(h.value()) { - Ok(v) => { - headers.append(k, v); - } - Err(_) => { - warn!("Failed to parse header value from input: {:?}", h.value()) - } + _ => match HeaderName::from_bytes(h.name()) { + Ok(k) => match HeaderValue::from_bytes(h.value()) { + Ok(v) => { + headers.append(k, v); }, - Err(_) => { - warn!("Failed to parse header name input: {:?}", h.name()) - } - }; + Err(_) => warn!("Failed to parse header value from input: {:?}", h.value()), + }, + Err(_) => warn!("Failed to parse header name input: {:?}", h.name()), } } } diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index f3ffda5d2..49a27d9bc 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -32,7 +32,7 @@ use crate::protocols::http::date::get_cached_date; use crate::protocols::http::HttpTask; pub use quiche::h3::Config as H3Options; use crate::protocols::l4::quic::{Connection, MAX_IPV6_QUIC_DATAGRAM_SIZE}; -use quiche::{h3, Connection as QuicheConnection, Shutdown}; +use quiche::{h3, Connection as QuicheConnection, ConnectionId, Shutdown}; use quiche::h3::{Connection as QuicheH3Connection, Event, NameValue}; use tokio::sync::{mpsc, Notify}; use tokio::sync::mpsc::{Receiver, Sender}; @@ -60,56 +60,83 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ return Err(Error::explain( ErrorType::InternalError, "connection needs to be established, invalid state")) } Connection::Established(state) => { - let conn_id; let hconn = { let mut qconn = state.connection.lock(); - conn_id = qconn.trace_id().to_string(); h3::Connection::with_transport(&mut qconn, &options).explain_err( ErrorType::ConnectError, |_| "failed to create H3 connection")? }; - (conn_id, state.connection.clone(), hconn, state.tx_notify.clone(), state.rx_notify.clone()) + state.tx_notify.notify_waiters(); + state.tx_flushed.notified().await; + + (state.connection_id.clone(), state.connection.clone(), state.drop_connection.clone(), hconn, + state.tx_notify.clone(), state.tx_flushed.clone(), state.rx_notify.clone()) } }; let drop_sessions = mpsc::channel(H3_SESSION_DROP_CHANNEL_SIZE); Ok(H3Connection { _l4stream: io, - connection_id: conn_id.to_string(), + connection_id: conn_id, + drop_quic_connection: drop_qconn, + quic_connection: qconn, h3_connection: Arc::new(Mutex::new(hconn)), + tx_notify, + tx_flushed, rx_notify, sessions: Default::default(), drop_sessions, max_accepted_stream_id: 0, received_goaway: None, + requests_handled: 0 }) } pub struct H3Connection { _l4stream: Stream, // ensure the stream will not be dropped until all sessions are - connection_id: String, + connection_id: ConnectionId<'static>, + drop_quic_connection: Sender>, + quic_connection: Arc>, h3_connection: Arc>, tx_notify: Arc, + tx_flushed: Arc, rx_notify: Arc, sessions: StreamIdHashMap>, drop_sessions: (Sender, Receiver), max_accepted_stream_id: u64, - received_goaway: Option + received_goaway: Option, + requests_handled: u64 +} + +impl Drop for H3Connection { + fn drop(&mut self) { + match self.drop_quic_connection.try_send(self.connection_id.clone()) { + Ok(()) => debug!("drop connection {:?}", self.connection_id), + Err(e) => { + let conn_id = match e { + TrySendError::Full(id) => id, + TrySendError::Closed(id) => id + }; + warn!("failed send drop connection {:?} request", conn_id) + } + } + } } + impl H3Connection { pub async fn graceful_shutdown(&mut self) -> Result<()> { // send GOAWAY frame @@ -117,10 +144,10 @@ impl H3Connection { let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); - debug!("H3 connection {} sending GoAway", self.connection_id); - hconn.send_goaway(&mut qconn,self.max_accepted_stream_id) + debug!("H3 connection {:?} sending GoAway", self.connection_id); + hconn.send_goaway(&mut qconn, self.max_accepted_stream_id) .explain_err(ErrorType::H3Error, |_| "failed to send graceful shutdown")?; - self.tx_notify.notify_one(); + self.tx_notify.notify_waiters(); } let drain = async { @@ -141,7 +168,7 @@ impl H3Connection { let mut qconn = self.quic_connection.lock(); qconn.close(false, 0x00, b"graceful shutdown") .explain_err(ErrorType::H3Error, |_| "failed to close quic connection")?; - self.tx_notify.notify_one(); + self.tx_notify.notify_waiters(); } if is_timeout { @@ -155,8 +182,8 @@ impl H3Connection { /// HTTP/3 server session pub struct HttpSession { - connection_id: String, - stream_id: u64, + pub(crate) connection_id: ConnectionId<'static>, + pub(crate) stream_id: u64, quic_connection: Arc>, h3_connection: Arc>, @@ -197,13 +224,13 @@ pub struct HttpSession { impl Drop for HttpSession { fn drop(&mut self) { match self.drop_session.try_send(self.stream_id) { - Ok(()) => debug!("drop stream {}", self.stream_id), + Ok(()) => debug!("H3 connection {:?} drop stream {}", self.connection_id, self.stream_id), Err(e) => { let id = match e { TrySendError::Full(id) => id, TrySendError::Closed(id) => id }; - warn!("stream {} failed notify drop session", id) + warn!("H3 connection {:?} stream {} failed notify drop session", self.connection_id, id) } } } @@ -246,7 +273,7 @@ impl HttpSession { if let Some(channel) = conn.sessions.get(&stream_id) { debug!( - "H3 connection {} stream {} forward event={:?}", + "H3 connection {:?} stream {} forward event={:?}", conn.connection_id, stream_id, ev ); channel.send(ev).await @@ -255,7 +282,7 @@ impl HttpSession { |e| format!("failed to send on event channel with {}", e))?; } else { debug!( - "H3 connection {} stream {} received event {:?}", + "H3 connection {:?} stream {} received event {:?}", conn.connection_id, stream_id, &ev ); match ev { @@ -271,7 +298,7 @@ impl HttpSession { }, Event::Headers { list, more_frames } => { trace!( - "H3 connection {} request headers={:?}, more_frames={:?}", + "H3 connection {:?} request headers={:?}, more_frames={:?}", conn.connection_id, &list, &more_frames @@ -307,7 +334,7 @@ impl HttpSession { }; if let Some(_) = conn.sessions.insert(stream_id, event_tx) { - debug_assert!(false, "H3 connection {} stream {} existing \ + debug_assert!(false, "H3 connection {:?} stream {} existing \ session is not allowed", conn.connection_id, stream_id) }; @@ -318,22 +345,66 @@ impl HttpSession { } } Err(h3::Error::Done) => { - debug!("H3 connection {} no events available", conn.connection_id); + debug!("H3 connection {:?} no events available", conn.connection_id); // TODO: in case PriorityUpdate was triggered call take_priority_update() here let timeout; + let is_closed; { let qconn = conn.quic_connection.lock(); - if qconn.is_closed() || - !(qconn.is_established() || qconn.is_in_early_data()) { - if conn.sessions.len() > 0 { - warn!("h3 connection {} closed with open {} sessions", - conn.connection_id, conn.sessions.len()); - } - return Ok(None) - } + is_closed = qconn.is_closed() || + !(qconn.is_established() || qconn.is_in_early_data()); timeout = qconn.timeout(); } + // housekeeping finished streams/requests + while !conn.drop_sessions.1.is_empty() { + if let Some(stream_id) = conn.drop_sessions.1.recv().await { + match conn.sessions.remove(&stream_id) { + None => { + debug_assert!(false, "failed to remove stream from sessions"); + warn!("failed to remove stream from sessions") + } + Some(_) => { + debug!("connection {:?} stream {} removed from sessions", + conn.connection_id, stream_id); + conn.requests_handled += 1; + } + }; + } + } + + // session was closed by remote + if is_closed { + if conn.sessions.len() > 0 { + warn!("H3 connection {:?} closed with open {} sessions", + conn.connection_id, conn.sessions.len()); + error!("H3 connection open sessions {:?}", conn.sessions); + } + error!("h3 connection {:?} closed", conn.connection_id); + conn.tx_flushed.notified().await; + return Ok(None) + } + + // closing session + if conn.sessions.is_empty() && conn.requests_handled > 0 { + debug!("connection {:?} closing as no (more) outstanding requests", conn.connection_id); + let res; + { + let mut qconn = conn.quic_connection.lock(); + res = qconn.close(true, 0x100, b"okthxbye"); + } + match res { + Ok(()) | Err(quiche::Error::Done) => { + conn.tx_notify.notify_waiters(); + // ensure data is flushed before dropping the connection + conn.tx_flushed.notified().await; + return Ok(None) + }, + Err(e) => Err(e).explain_err( + ErrorType::H3Error, |_| "failed to close quic connection")?, + } + } + debug!("Quic connection {:?} is still active. Timeout: {:?}", conn.connection_id, timeout); if let Some(timeout) = timeout { // race for new data on connection or timeout @@ -346,7 +417,7 @@ impl HttpSession { } }; - debug!("H3 connection {} waiting for data", conn.connection_id); + debug!("H3 connection {:?} waiting for data", conn.connection_id); continue 'poll; } Err(e) => { @@ -354,19 +425,13 @@ impl HttpSession { // the appropriate error code, using the transport’s close() method. // send the close() event - conn.tx_notify.notify_one(); + conn.tx_notify.notify_waiters(); error!("H3 connection closed with error {:?}.", e); return Err(e).explain_err( ErrorType::H3Error, |_| "while accepting new downstream requests") } } - - while !conn.drop_sessions.1.is_empty() { - if let Some(stream_id) = conn.drop_sessions.1.recv().await { - conn.sessions.remove(&stream_id); - } - } } } @@ -422,8 +487,8 @@ impl HttpSession { let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); debug!( - "H3 connection {} stream {} receiving body", - qconn.trace_id(), + "H3 connection {:?} stream {} receiving body", + self.connection_id, self.stream_id ); hconn.recv_body(&mut qconn, self.stream_id, out) @@ -484,15 +549,15 @@ impl HttpSession { self.stream_capacity(header_size(headers)).await .explain_err( ErrorType::WriteError, - |_| format!("H3 connection {} failed to acquire capacity for stream {}", + |_| format!("H3 connection {:?} failed to acquire capacity for stream {}", self.connection_id, self.stream_id))?; let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); debug!( - "H3 connection {} stream {} sending response headers={:?}, finished={}", - qconn.trace_id(), + "H3 connection {:?} stream {} sending response headers={:?}, finished={}", + self.connection_id, self.stream_id, headers, fin @@ -500,7 +565,7 @@ impl HttpSession { match hconn.send_response(&mut qconn, self.stream_id, headers, fin) { Ok(()) => { - self.tx_notify.notify_one(); + self.tx_notify.notify_waiters(); Ok(()) } Err(h3::Error::Done) => { Ok(()) }, @@ -541,7 +606,7 @@ impl HttpSession { match self.send_body(send, end).await { Ok(sent_size) => { sent_len += sent_size; - self.tx_notify.notify_one(); + self.tx_notify.notify_waiters(); }, Err(e) => return Err(e).explain_err( ErrorType::WriteError, |_| "writing h3 response body to downstream") @@ -557,7 +622,7 @@ impl HttpSession { let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); - debug!("H3 connection {} stream {} sending response body with length={:?}, finished={}", + debug!("H3 connection {:?} stream {} sending response body with length={:?}, finished={}", self.connection_id, self.stream_id, body.len(), fin); hconn.send_body(&mut qconn, self.stream_id, body, fin) @@ -607,15 +672,15 @@ impl HttpSession { self.stream_capacity(header_size(headers)).await .explain_err( ErrorType::WriteError, - |_| format!("H3 connection {} failed to acquire capacity for stream {}", + |_| format!("H3 connection {:?} failed to acquire capacity for stream {}", self.connection_id, self.stream_id))?; let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); debug!( - "H3 connection {} stream {} sending additional headers={:?}, is_trailer={:?} finished={}", - qconn.trace_id(), + "H3 connection {:?} stream {} sending additional headers={:?}, is_trailer={:?} finished={}", + self.connection_id, self.stream_id, headers, is_trailer, @@ -624,7 +689,7 @@ impl HttpSession { match hconn.send_additional_headers(&mut qconn, stream_id, headers, is_trailer, fin) { Ok(()) => { - self.tx_notify.notify_one(); + self.tx_notify.notify_waiters(); Ok(()) } Err(e) => Err(e).explain_err( @@ -671,9 +736,9 @@ impl HttpSession { loop { match self.event_rx.recv().await { Some(ev) => { - trace!("event {:?}", ev); match ev { Event::Finished => { + trace!("stream {} event {:?}", self.stream_id, ev); self.read_ended = true; return Ok(()) } @@ -681,6 +746,7 @@ impl HttpSession { debug_assert!(false, "Headers or Finished event when Data requested"); }, Event::Data => { + trace!("stream {} event {:?}", self.stream_id, ev); return Ok(()) } Event::Reset(error_code) => { @@ -721,7 +787,7 @@ impl HttpSession { loop { match self.event_rx.recv().await { Some(ev) => { - trace!("event {:?}", ev); + error!("reset stream {} event {:?}", self.stream_id, ev); match ev { Event::Data | Event::Finished | @@ -826,7 +892,7 @@ impl HttpSession { fn stream_shutdown(&self, direction: Shutdown, error_code: u64) { let mut qconn = self.quic_connection.lock(); match qconn.stream_shutdown(self.stream_id, direction, error_code) { - Ok(()) => self.tx_notify.notify_one(), + Ok(()) => self.tx_notify.notify_waiters(), Err(e) => warn!("h3 stream {} shutdown failed. {:?}", self.stream_id, e) } } diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 99e546a9c..a3002bac5 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -15,7 +15,7 @@ use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio::net::UdpSocket; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::sync::mpsc::error::TryRecvError; -use tokio::sync::Notify; +use tokio::sync::{mpsc, Notify}; use pingora_error::{BError, Error, ErrorType, Result}; use quiche::Connection as QuicheConnection; use tokio::task::JoinHandle; @@ -44,6 +44,7 @@ pub const MAX_IPV6_UDP_PACKET_SIZE: usize = 1452; pub const MAX_IPV6_QUIC_DATAGRAM_SIZE: usize = 1350; const HANDSHAKE_PACKET_BUFFER_SIZE: usize = 64; +const CONNECTION_DROP_CHANNEL_SIZE : usize = 1024; pub struct Listener { socket: Arc, @@ -53,6 +54,7 @@ pub struct Listener { crypto: Crypto, connections: Mutex, ConnectionHandle>>, + drop_connections: (Sender>, Mutex>>) } pub struct Crypto { @@ -67,6 +69,7 @@ pub enum Connection { pub struct IncomingState { connection_id: ConnectionId<'static>, config: Arc>, + drop_connection: Sender>, socket: Arc, socket_details: SocketDetails, @@ -90,10 +93,12 @@ pub struct EstablishedState { socket: Arc, tx_handle: JoinHandle>, - connection_id: String, + pub(crate) connection_id: ConnectionId<'static>, pub connection: Arc>, - pub tx_notify: Arc, + pub drop_connection: Sender>, pub rx_notify: Arc, + pub tx_notify: Arc, + pub tx_flushed: Arc, } pub enum ConnectionHandle { @@ -125,6 +130,7 @@ pub(crate) enum HandshakeResponse { #[derive(Clone)] pub struct EstablishedHandle { + connection_id: ConnectionId<'static>, connection: Arc>, rx_notify: Arc, tx_notify: Arc, @@ -164,6 +170,7 @@ impl TryFrom for Listener { }, }; + let drop_connections = mpsc::channel(CONNECTION_DROP_CHANNEL_SIZE); Ok(Listener { socket: Arc::new(io), socket_details: SocketDetails { @@ -178,6 +185,7 @@ impl TryFrom for Listener { }, connections: Default::default(), + drop_connections: (drop_connections.0, Mutex::new(drop_connections.1)) }) } } @@ -191,6 +199,29 @@ impl Listener { // receive from network and parse Quic header let (size, from) = self.socket.recv_from(&mut rx_buf).await?; + // cleanup connections + { + let mut drop_conn = self.drop_connections.1.lock(); + let mut conn = self.connections.lock(); + 'housekeep: loop { + match drop_conn.try_recv() { + Ok(drop_id) => { + match conn.remove(&drop_id) { + None => error!("failed to remove connection handle {:?}", drop_id), + Some(_) => debug!("removed connection handle {:?} from connections", drop_id) + } + } + Err(e) => match e { + TryRecvError::Empty => break 'housekeep, + TryRecvError::Disconnected => { + debug_assert!(false, "drop connections receiver disconnected"); + break 'housekeep + } + } + }; + } + } + // parse the Quic packet's header let header = match Header::from_slice(rx_buf[..size].as_mut(), quiche::MAX_CONN_ID_LEN) { Ok(hdr) => hdr, @@ -200,7 +231,6 @@ impl Listener { continue 'read; } }; - trace!("dgram received from={} length={}", from, size); // TODO: allow for connection id updates during lifetime // connection needs to be able to update source_ids() or destination_ids() @@ -221,18 +251,32 @@ impl Listener { conn_id = Self::gen_cid(&self.crypto.key, &header); handle = connections.get_mut(&conn_id); }; + + trace!("connection {:?} dgram received from={} length={}", conn_id, from, size); + if let Some(handle) = handle { - trace!("existing connection {:?} {:?}", conn_id, handle); + debug!("existing connection {:?} {:?} {:?}", conn_id, handle, header); match handle { ConnectionHandle::Incoming(i) => { match i.response_rx.try_recv() { Ok(msg) => { match msg { HandshakeResponse::Established(e) => { + debug!("received HandshakeResponse::Established"); // receive data into existing connection - Self::recv_connection(e.connection.as_ref(), &mut rx_buf[..size], recv_info)?; - // transition connection - handle.establish(e) + match Self::recv_connection(e.connection.as_ref(), &mut rx_buf[..size], recv_info) { + Ok(_len) => { + e.rx_notify.notify_waiters(); + e.tx_notify.notify_waiters(); + // transition connection + handle.establish(e); + continue 'read; + } + Err(e) => { + // TODO: take action on errors, e.g close connection, send & remove + break 'read Err(e); + } + } } HandshakeResponse::Ignored | HandshakeResponse::Rejected => { @@ -259,7 +303,8 @@ impl Listener { match Self::recv_connection(e.connection.as_ref(), &mut rx_buf[..size], recv_info) { Ok(_len) => { e.rx_notify.notify_waiters(); - e.tx_notify.notify_one(); + e.tx_notify.notify_waiters(); + continue 'read; } Err(e) => { // TODO: take action on errors, e.g close connection, send & remove @@ -293,10 +338,11 @@ impl Listener { let (udp_tx, udp_rx) = channel::(HANDSHAKE_PACKET_BUFFER_SIZE); let (response_tx, response_rx) = channel::(1); - trace!("new incoming connection {:?}", conn_id); + debug!("new incoming connection {:?}", conn_id); let connection = Connection::Incoming(IncomingState { connection_id: conn_id.clone(), config: self.config.clone(), + drop_connection: self.drop_connections.0.clone(), socket: self.socket.clone(), socket_details: self.socket_details.clone(), @@ -362,6 +408,7 @@ impl ConnectionHandle { fn establish(&mut self, handle: EstablishedHandle) { match self { ConnectionHandle::Incoming(_) => { + debug!("connection handle {:?} established", handle.connection_id); let _ = mem::replace(self, ConnectionHandle::Established(handle)); } ConnectionHandle::Established(_) => {} @@ -398,7 +445,7 @@ impl Drop for Connection { Connection::Established(s) => { if !s.tx_handle.is_finished() { s.tx_handle.abort(); - trace!("stopped connection tx task"); + error!("stopped connection tx task"); } } } @@ -410,9 +457,10 @@ struct ConnectionTx { socket_details: SocketDetails, connection: Arc>, - connection_id: String, + connection_id: ConnectionId<'static>, tx_notify: Arc, + tx_flushed: Arc, tx_stats: TxBurst, } @@ -422,7 +470,7 @@ impl ConnectionTx { let mut out = [0u8;MAX_IPV6_BUF_SIZE]; let mut finished_sending = false; - debug!("connection tx write"); + debug!("connection {:?} tx write", id); 'write: loop { // update stats from connection let max_send_burst = { @@ -433,7 +481,7 @@ impl ConnectionTx { let mut dst_info = None; // fill tx buffer with connection data - trace!("total_write={}, max_send_burst={}", total_write, max_send_burst); + trace!("connection {:?} total_write={}, max_send_burst={}", id, total_write, max_send_burst); 'fill: while total_write < max_send_burst { let send = { let mut conn = self.connection.lock(); @@ -442,16 +490,16 @@ impl ConnectionTx { let (size, send_info) = match send { Ok((size, info)) => { - debug!("connection sent to={:?}, length={}", info.to, size); + debug!("connection {:?} sent to={:?}, length={}", id, info.to, size); (size, info) }, Err(e) => { if e == quiche::Error::Done { - trace!("connection send finished"); + trace!("connection {:?} send finished", id); finished_sending = true; break 'fill; } - error!("connection send error: {:?}", e); + error!("connection {:?} send error: {:?}", id, e); /* TODO: close connection let mut conn = self.connection.lock(); conn.close(false, 0x1, b"fail").ok(); @@ -468,7 +516,7 @@ impl ConnectionTx { } if total_write == 0 || dst_info.is_none() { - debug!("nothing to send, waiting for notification..."); + debug!("connection {:?} nothing to send, waiting for notification...", id); self.tx_notify.notified().await; continue; } @@ -484,17 +532,18 @@ impl ConnectionTx { self.socket_details.gso_enabled, ).await { if e.kind() == io::ErrorKind::WouldBlock { - error!("network socket would block"); + error!("connection {:?} network socket would block", id); continue } break 'write Err(Error::explain( ErrorType::WriteError, - format!("network send failed with {:?}", e))); + format!("connection {:?} network send failed with {:?}", id, e))); } - trace!("network sent to={} bytes={}", dst_info.to, total_write); + trace!("connection {:?} network sent to={} bytes={}", id, dst_info.to, total_write); if finished_sending { - debug!("sending finished, waiting for notification..."); + // used during connection shutdown + self.tx_flushed.notify_waiters(); self.tx_notify.notified().await } } diff --git a/pingora-core/src/protocols/l4/quic/settings.rs b/pingora-core/src/protocols/l4/quic/settings.rs index b51091d89..c0c288ccd 100644 --- a/pingora-core/src/protocols/l4/quic/settings.rs +++ b/pingora-core/src/protocols/l4/quic/settings.rs @@ -33,7 +33,7 @@ impl Settings { // config.log_keys() && config.set_keylog(); // logging SSL secrets // config.set_ticket_key() // session ticket signer key material - config.enable_early_data(); + //config.enable_early_data(); // can lead to ZeroRTT headers during handshake config .set_application_protos(quiche::h3::APPLICATION_PROTOCOL) diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs index da2539ab0..220c20aff 100644 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -2,6 +2,7 @@ use std::net::SocketAddr; use std::sync::Arc; use log::{debug, error, trace, warn}; use parking_lot::Mutex; +use quiche::ConnectionId; use tokio::net::UdpSocket; use tokio::sync::Notify; use pingora_error::{Error, ErrorType, OrErr}; @@ -35,6 +36,7 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result (size, info), @@ -187,15 +190,15 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result pingora_error::Result {} - Some(e) => { - error!("{}", String::from_utf8_lossy(e.reason.as_slice()).to_string()) - } + trace!("connection {:?} peer_error={:?}, local_error={:?}", conn_id, conn.peer_error(), conn.local_error()); + if let Some(e) = conn.peer_error() { + error!("connection {:?} peer error reason: {}", conn_id, String::from_utf8_lossy(e.reason.as_slice()).to_string()); } - match conn.local_error() { - None => {} - Some(e) => { - error!("{}", String::from_utf8_lossy(e.reason.as_slice()).to_string()) - } + if let Some(e) = conn.local_error() { + error!("connection {:?} local error reason: {}", conn_id, String::from_utf8_lossy(e.reason.as_slice()).to_string()); } } // stop accepting packets on mpsc channel @@ -234,8 +231,9 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result, buf: &[u8], to: SocketAddr) -> pingora_error::Result { +async fn send_dgram(id: &ConnectionId<'_>, io: &Arc, buf: &[u8], to: SocketAddr) -> pingora_error::Result { match io.send_to(buf, &to).await { Ok(sent) => { debug_assert_eq!(sent, buf.len(), "amount of network sent data does not correspond to packet size"); - trace!("sent dgram to={:?} length={:?} ", to, buf.len()); + trace!("connection {:?} sent dgram to={:?} length={:?} ", id, to, buf.len()); Ok(sent) } Err(e) => { From de6dcc0960c190fe77fb03f4018d739a38357a52 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Wed, 8 Jan 2025 11:56:34 +0100 Subject: [PATCH 12/52] enhance TLS handshake robustness & timeouts --- pingora-core/src/protocols/http/v3/server.rs | 124 ++++++++++------- pingora-core/src/protocols/l4/quic/mod.rs | 126 ++++++++++-------- .../src/protocols/l4/quic/tls_handshake.rs | 63 +++++---- pingora-core/tests/test_basic.rs | 2 +- 4 files changed, 185 insertions(+), 130 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 49a27d9bc..fe82b81e1 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -22,7 +22,7 @@ use http::uri::PathAndQuery; use http::{header, HeaderMap, HeaderName}; use pingora_error::{Error, ErrorType, OrErr, Result}; use std::sync::{Arc, OnceLock}; -use std::time::Duration; +use std::time::{Duration, Instant}; use log::{debug, error, info, trace, warn}; use parking_lot::Mutex; use crate::protocols::http::v1::client::http_req_header_to_wire; @@ -47,6 +47,7 @@ const H3_SESSION_EVENTS_CHANNEL_SIZE : usize = 256; const H3_SESSION_DROP_CHANNEL_SIZE : usize = 1024; const BODY_BUF_LIMIT: usize = 1024 * 64; const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); +const DEFAULT_CONNECTION_IDLE_TIMEOUT: Duration = Duration::from_millis(25); /// Perform HTTP/3 connection handshake with an established (QUIC) connection. /// @@ -97,7 +98,6 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

, - requests_handled: u64 } impl Drop for H3Connection { @@ -295,6 +294,13 @@ impl HttpSession { Event::GoAway => { info!("stream_id {} received GoAway", stream_id); conn.received_goaway = Some(stream_id); + + let mut qconn = conn.quic_connection.lock(); + let mut hconn = conn.h3_connection.lock(); + hconn.send_goaway(&mut qconn, conn.max_accepted_stream_id) + .explain_err( + ErrorType::InternalError, |_| "failed to send goaway")?; + conn.tx_notify.notify_waiters(); }, Event::Headers { list, more_frames } => { trace!( @@ -347,16 +353,8 @@ impl HttpSession { Err(h3::Error::Done) => { debug!("H3 connection {:?} no events available", conn.connection_id); // TODO: in case PriorityUpdate was triggered call take_priority_update() here - let timeout; - let is_closed; - { - let qconn = conn.quic_connection.lock(); - is_closed = qconn.is_closed() || - !(qconn.is_established() || qconn.is_in_early_data()); - timeout = qconn.timeout(); - } - // housekeeping finished streams/requests + // housekeeping finished sessions while !conn.drop_sessions.1.is_empty() { if let Some(stream_id) = conn.drop_sessions.1.recv().await { match conn.sessions.remove(&stream_id) { @@ -367,58 +365,87 @@ impl HttpSession { Some(_) => { debug!("connection {:?} stream {} removed from sessions", conn.connection_id, stream_id); - conn.requests_handled += 1; } }; } } - // session was closed by remote + let is_closed; + let timeout; + let timeout_now; + { + let qconn = conn.quic_connection.lock(); + is_closed = qconn.is_closed() || + !(qconn.is_established() || qconn.is_in_early_data()); + if is_closed { + if let Some(e) = qconn.peer_error() { + debug!("connection {:?} peer error reason: {}", conn.connection_id, + String::from_utf8_lossy(e.reason.as_slice()).to_string()); + } + if let Some(e) = qconn.local_error() { + debug!("connection {:?} local error reason: {}", conn.connection_id, + String::from_utf8_lossy(e.reason.as_slice()).to_string()); + } + } + timeout = qconn.timeout_instant(); + timeout_now = Instant::now(); + } + if is_closed { if conn.sessions.len() > 0 { warn!("H3 connection {:?} closed with open {} sessions", conn.connection_id, conn.sessions.len()); - error!("H3 connection open sessions {:?}", conn.sessions); + } else { + debug!("H3 connection {:?} closed", conn.connection_id); } - error!("h3 connection {:?} closed", conn.connection_id); + + conn.tx_notify.notify_waiters(); conn.tx_flushed.notified().await; return Ok(None) } - // closing session - if conn.sessions.is_empty() && conn.requests_handled > 0 { - debug!("connection {:?} closing as no (more) outstanding requests", conn.connection_id); - let res; - { - let mut qconn = conn.quic_connection.lock(); - res = qconn.close(true, 0x100, b"okthxbye"); - } - match res { - Ok(()) | Err(quiche::Error::Done) => { - conn.tx_notify.notify_waiters(); - // ensure data is flushed before dropping the connection - conn.tx_flushed.notified().await; - return Ok(None) - }, - Err(e) => Err(e).explain_err( - ErrorType::H3Error, |_| "failed to close quic connection")?, - } - } - - debug!("Quic connection {:?} is still active. Timeout: {:?}", conn.connection_id, timeout); - if let Some(timeout) = timeout { - // race for new data on connection or timeout - tokio::select! { - _timeout = tokio::time::sleep(timeout) => { - let mut qconn = conn.quic_connection.lock(); - qconn.on_timeout(); + // race for new data on connection or timeout + tokio::select! { + _data = conn.rx_notify.notified() => {} + used_timeout_duration = async { + // quiche timeout instants are on the initial calls very short + // quiche timeout durations are sometimes 0ns, None would be expected + // this can lead to premature closing of the connection + // guarding with DEFAULT_CONNECTION_IDLE_TIMEOUT + if timeout.is_none() { + trace!("connection {:?} default timeout {:?}", conn.connection_id, DEFAULT_CONNECTION_IDLE_TIMEOUT); + tokio::time::sleep(DEFAULT_CONNECTION_IDLE_TIMEOUT.into()).await; + DEFAULT_CONNECTION_IDLE_TIMEOUT + } else { + if timeout < Instant::now().checked_add(DEFAULT_CONNECTION_IDLE_TIMEOUT) { + trace!("connection {:?} default timeout {:?}", conn.connection_id, DEFAULT_CONNECTION_IDLE_TIMEOUT); + tokio::time::sleep(DEFAULT_CONNECTION_IDLE_TIMEOUT.into()).await; + DEFAULT_CONNECTION_IDLE_TIMEOUT + } else { + let timeout = timeout.unwrap(); + let timeout_duration = timeout.duration_since(timeout_now); + tokio::time::sleep(timeout_duration.into()).await; + trace!("connection {:?} timeout {:?}", conn.connection_id, timeout_duration); + timeout_duration + } + } + } => { + if conn.sessions.len() > 0 { + warn!("connection {:?} timeout {:?} reached with {} open sessions", + conn.connection_id, used_timeout_duration, conn.sessions.len()); + } else { + { + let mut qconn = conn.quic_connection.lock(); + qconn.on_timeout(); + } + debug!("connection {:?} timed out {:?}", conn.connection_id, timeout); } - _data = conn.rx_notify.notified() => {} - } - }; - debug!("H3 connection {:?} waiting for data", conn.connection_id); - continue 'poll; + conn.tx_notify.notify_waiters(); + conn.tx_flushed.notified().await; + return Ok(None) + } + } } Err(e) => { // If an error occurs while processing data, the connection is closed with @@ -725,6 +752,7 @@ impl HttpSession { ErrorType::WriteError, |e| format! {"Writing h3 response body to downstream failed. {e}"}, )?; + self.tx_notify.notify_waiters(); self.send_ended = true; } // else: the response header is not sent, do nothing now. diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index a3002bac5..0c5e88141 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -16,7 +16,7 @@ use tokio::net::UdpSocket; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::{mpsc, Notify}; -use pingora_error::{BError, Error, ErrorType, Result}; +use pingora_error::{BError, Error, ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; use tokio::task::JoinHandle; use settings::Settings as QuicSettings; @@ -74,7 +74,7 @@ pub struct IncomingState { socket: Arc, socket_details: SocketDetails, udp_rx: Receiver, - response_tx: Sender, + response: Arc>>, dgram: UdpRecv, @@ -118,7 +118,7 @@ impl Debug for ConnectionHandle { pub struct IncomingHandle { udp_tx: Sender, - response_rx: Receiver, + response: Arc>>, } pub(crate) enum HandshakeResponse { @@ -242,6 +242,7 @@ impl Listener { let mut conn_id = header.dcid.clone(); let mut udp_tx = None; + { let mut connections = self.connections.lock(); // send to corresponding connection @@ -252,55 +253,37 @@ impl Listener { handle = connections.get_mut(&conn_id); }; - trace!("connection {:?} dgram received from={} length={}", conn_id, from, size); + trace!("connection {:?} network received from={} length={}", conn_id, from, size); if let Some(handle) = handle { debug!("existing connection {:?} {:?} {:?}", conn_id, handle, header); + let mut established_handle = None; match handle { ConnectionHandle::Incoming(i) => { - match i.response_rx.try_recv() { - Ok(msg) => { - match msg { - HandshakeResponse::Established(e) => { - debug!("received HandshakeResponse::Established"); - // receive data into existing connection - match Self::recv_connection(e.connection.as_ref(), &mut rx_buf[..size], recv_info) { - Ok(_len) => { - e.rx_notify.notify_waiters(); - e.tx_notify.notify_waiters(); - // transition connection - handle.establish(e); - continue 'read; - } - Err(e) => { - // TODO: take action on errors, e.g close connection, send & remove - break 'read Err(e); - } - } - } - HandshakeResponse::Ignored - | HandshakeResponse::Rejected => { - connections.remove(&header.dcid); - continue 'read - } + let resp; + { + resp = i.response.lock().take(); + } + if let Some(resp) = resp { + match resp { + HandshakeResponse::Established(e) => { + debug!("connection {:?} received HandshakeResponse::Established", conn_id); + // receive data into existing connection + established_handle = Some(e); + } + HandshakeResponse::Ignored + | HandshakeResponse::Rejected => { + connections.remove(&header.dcid); + continue 'read } } - Err(e) => { - match e { - TryRecvError::Empty => { - udp_tx = Some(i.udp_tx.clone()); - } - TryRecvError::Disconnected => { - warn!("dropping connection {:?} handshake response channel receiver disconnected.", &header.dcid); - connections.remove(&header.dcid); - } - }; - } + } else { + udp_tx = Some(i.udp_tx.clone()); } } ConnectionHandle::Established(e) => { // receive data into existing connection - match Self::recv_connection(e.connection.as_ref(), &mut rx_buf[..size], recv_info) { + match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { Ok(_len) => { e.rx_notify.notify_waiters(); e.tx_notify.notify_waiters(); @@ -313,6 +296,21 @@ impl Listener { } } } + if let Some(e) = established_handle { + match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { + Ok(_len) => { + e.rx_notify.notify_waiters(); + e.tx_notify.notify_waiters(); + // transition connection + handle.establish(e); + continue 'read; + } + Err(e) => { + // TODO: take action on errors, e.g close connection, send & remove + break 'read Err(e); + } + } + } } }; if let Some(udp_tx) = udp_tx { @@ -336,7 +334,7 @@ impl Listener { // create incoming connection & handle let (udp_tx, udp_rx) = channel::(HANDSHAKE_PACKET_BUFFER_SIZE); - let (response_tx, response_rx) = channel::(1); + let response = Arc::new(Mutex::new(None)); debug!("new incoming connection {:?}", conn_id); let connection = Connection::Incoming(IncomingState { @@ -347,7 +345,7 @@ impl Listener { socket: self.socket.clone(), socket_details: self.socket_details.clone(), udp_rx, - response_tx, + response: response.clone(), dgram: UdpRecv { pkt: rx_buf[..size].to_vec(), @@ -360,7 +358,7 @@ impl Listener { }); let handle = ConnectionHandle::Incoming(IncomingHandle { udp_tx, - response_rx, + response, }); { @@ -372,17 +370,17 @@ impl Listener { } } - fn recv_connection(conn: &Mutex, mut rx_buf: &mut [u8], recv_info: RecvInfo) -> io::Result { + fn recv_connection(conn_id: &ConnectionId<'_>, conn: &Mutex, mut rx_buf: &mut [u8], recv_info: RecvInfo) -> io::Result { let size = rx_buf.len(); let mut conn = conn.lock(); match conn.recv(&mut rx_buf, recv_info) { Ok(len) => { - debug!("connection received: length={}", len); + debug!("connection {:?} received data length={}", conn_id, len); debug_assert_eq!(size, len, "size received on connection not equal to len received from network."); Ok(len) } Err(e) => { - error!("connection receive error: {:?}", e); + error!("connection {:?} receive error {:?}", conn_id, e); Err(io::Error::new( io::ErrorKind::BrokenPipe, format!("Connection could not receive network data for {:?}. {:?}", @@ -425,9 +423,32 @@ impl Connection { } match self { Connection::Incoming(s) => { + 'drain: loop { + match s.udp_rx.try_recv() { + Ok(mut dgram) => { + let mut conn = state.connection.lock(); + conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) + .explain_err( + ErrorType::HandshakeError, |_| "receiving dgram failed")?; + debug!("connection {:?} dgram received while establishing", s.connection_id) + } + Err(e) => { + match e { + TryRecvError::Empty => { + // stop accepting packets + s.udp_rx.close(); + } + TryRecvError::Disconnected => { + // remote already closed channel + } + } + break 'drain; + } + } + } debug_assert!(s.udp_rx.is_empty(), "udp rx channel must be empty when establishing the connection"); - debug!("connection {:?} established", state.connection_id); + error!("connection {:?} established", state.connection_id); let _ = mem::replace(self, Connection::Established(state)); Ok(()) } @@ -445,7 +466,7 @@ impl Drop for Connection { Connection::Established(s) => { if !s.tx_handle.is_finished() { s.tx_handle.abort(); - error!("stopped connection tx task"); + debug!("connection {:?} stopped tx task", s.connection_id); } } } @@ -516,7 +537,8 @@ impl ConnectionTx { } if total_write == 0 || dst_info.is_none() { - debug!("connection {:?} nothing to send, waiting for notification...", id); + trace!("connection {:?} nothing to send", id); + self.tx_flushed.notify_waiters(); self.tx_notify.notified().await; continue; } @@ -542,6 +564,7 @@ impl ConnectionTx { trace!("connection {:?} network sent to={} bytes={}", id, dst_info.to, total_write); if finished_sending { + trace!("connection {:?} finished sending", id); // used during connection shutdown self.tx_flushed.notify_waiters(); self.tx_notify.notified().await @@ -626,8 +649,7 @@ impl AsyncWrite for Connection { } fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - // FIXME: this is called on l4::Stream::drop() - // correlates to the connection, check if stopping tx loop for connection & final flush is feasible + // this is called on l4::Stream::drop() Poll::Ready(Ok(())) } diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs index 220c20aff..36db0a70b 100644 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -4,6 +4,7 @@ use log::{debug, error, trace, warn}; use parking_lot::Mutex; use quiche::ConnectionId; use tokio::net::UdpSocket; +use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::Notify; use pingora_error::{Error, ErrorType, OrErr}; use crate::protocols::ConnectionState; @@ -18,14 +19,14 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { - if let Some((e_state, e_handle)) = handshake_inner(s).await? { - s.response_tx.send(HandshakeResponse::Established(e_handle)).await - .explain_err(ErrorType::WriteError, - |e| format!("Sending HandshakeResponse failed with {}", e))?; + Connection::Incoming(i) => { + if let Some(e_state) = handshake_inner(i).await? { + // send HANDSHAKE_DONE Quic frame on established connection + e_state.tx_notify.notify_waiters(); + e_state.tx_flushed.notified().await; Some(e_state) } else { - debug!("handshake either rejected or ignored for connection {:?}", s.connection_id); + debug!("handshake either rejected or ignored for connection {:?}", i.connection_id); None } } @@ -36,7 +37,6 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result pingora_error::Result pingora_error::Result> { +async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result> { let IncomingState { connection_id: conn_id, config, @@ -55,21 +55,23 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result pingora_error::Result pingora_error::Result Date: Thu, 9 Jan 2025 15:57:28 +0100 Subject: [PATCH 13/52] fix send issue/packet creation --- pingora-core/src/protocols/http/v3/server.rs | 32 ++++++++---- pingora-core/src/protocols/l4/quic/mod.rs | 49 +++++++++++++------ .../src/protocols/l4/quic/tls_handshake.rs | 7 +-- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index fe82b81e1..ec90e3e50 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -47,7 +47,7 @@ const H3_SESSION_EVENTS_CHANNEL_SIZE : usize = 256; const H3_SESSION_DROP_CHANNEL_SIZE : usize = 1024; const BODY_BUF_LIMIT: usize = 1024 * 64; const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); -const DEFAULT_CONNECTION_IDLE_TIMEOUT: Duration = Duration::from_millis(25); +const DEFAULT_CONNECTION_IDLE_TIMEOUT: Duration = Duration::from_millis(1000); /// Perform HTTP/3 connection handshake with an established (QUIC) connection. /// @@ -75,7 +75,7 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

) -> Result

, + tx_flushed: Arc, // receive notification on Quic recv, used to check stream capacity // as it only increases after MaxData or MaxStreamData frame was received rx_notify: Arc, @@ -323,6 +326,7 @@ impl HttpSession { drop_session: conn.drop_sessions.0.clone(), tx_notify: conn.tx_notify.clone(), + tx_flushed: conn.tx_flushed.clone(), rx_notify: conn.rx_notify.clone(), event_rx, @@ -400,7 +404,7 @@ impl HttpSession { } conn.tx_notify.notify_waiters(); - conn.tx_flushed.notified().await; + //conn.tx_flushed.notified().await; return Ok(None) } @@ -408,6 +412,7 @@ impl HttpSession { tokio::select! { _data = conn.rx_notify.notified() => {} used_timeout_duration = async { + // FIXME: check if this is still an issue // quiche timeout instants are on the initial calls very short // quiche timeout durations are sometimes 0ns, None would be expected // this can lead to premature closing of the connection @@ -442,7 +447,7 @@ impl HttpSession { } conn.tx_notify.notify_waiters(); - conn.tx_flushed.notified().await; + //conn.tx_flushed.notified().await; return Ok(None) } } @@ -616,22 +621,25 @@ impl HttpSession { }; let mut sent_len = 0; + let mut fin = end; while sent_len < data.len() { - let required = cmp::min(data.len(), MAX_IPV6_QUIC_DATAGRAM_SIZE); + let required = cmp::min(data.len() - sent_len, MAX_IPV6_QUIC_DATAGRAM_SIZE); let capacity = self.stream_capacity(required).await .explain_err( ErrorType::WriteError, |e| format!("Failed to acquire capacity on stream id {} with {}", self.stream_id, e))?; let send; - if capacity > data.len() { + if capacity > data.len() - sent_len { send = &data[sent_len..data.len()]; } else { - send = &data[sent_len..capacity]; + send = &data[sent_len..sent_len + capacity]; } - match self.send_body(send, end).await { + fin = sent_len + send.len() == data.len() && end; + match self.send_body(send, fin) { Ok(sent_size) => { + debug_assert_eq!(sent_size, send.len()); sent_len += sent_size; self.tx_notify.notify_waiters(); }, @@ -639,13 +647,15 @@ impl HttpSession { ErrorType::WriteError, |_| "writing h3 response body to downstream") } } + debug_assert_eq!(fin, end); + debug_assert_eq!(sent_len, data.len()); self.body_sent += sent_len; self.send_ended = self.send_ended || end; Ok(()) } - async fn send_body(&self, body: &[u8], fin: bool) -> h3::Result { + fn send_body(&self, body: &[u8], fin: bool) -> h3::Result { let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); @@ -665,6 +675,8 @@ impl HttpSession { if capacity >= required { Ok(capacity) } else { + self.tx_notify.notify_waiters(); + //self.tx_flushed.notified().await; self.rx_notify.notified().await; Box::pin(self.stream_capacity(required)).await } @@ -747,7 +759,7 @@ impl HttpSession { if self.response_header_written.is_some() { // use an empty data frame to signal the end - self.send_body(&[], true).await + self.send_body(&[], true) .explain_err( ErrorType::WriteError, |e| format! {"Writing h3 response body to downstream failed. {e}"}, diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 0c5e88141..ad1793130 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::{io, mem}; use std::fmt::{Debug, Formatter}; +use std::io::ErrorKind; use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; use std::pin::Pin; @@ -91,14 +92,12 @@ struct SocketDetails { pub struct EstablishedState { socket: Arc, - tx_handle: JoinHandle>, - pub(crate) connection_id: ConnectionId<'static>, - pub connection: Arc>, - pub drop_connection: Sender>, - pub rx_notify: Arc, - pub tx_notify: Arc, - pub tx_flushed: Arc, + pub(crate) connection: Arc>, + pub(crate) drop_connection: Sender>, + pub(crate) rx_notify: Arc, + pub(crate) tx_notify: Arc, + tx_handle: JoinHandle>, } pub enum ConnectionHandle { @@ -197,7 +196,18 @@ impl Listener { debug!("endpoint rx loop"); 'read: loop { // receive from network and parse Quic header - let (size, from) = self.socket.recv_from(&mut rx_buf).await?; + let (size, from) = match self.socket.try_recv_from(&mut rx_buf) { + Ok((size, from)) => (size, from), + Err(e) => { + if e.kind() == ErrorKind::WouldBlock { + // no more UDP packets to read for now, wait for new packets + self.socket.readable().await?; + continue 'read; + } else { + return Err(e) + } + } + }; // cleanup connections { @@ -448,7 +458,7 @@ impl Connection { } debug_assert!(s.udp_rx.is_empty(), "udp rx channel must be empty when establishing the connection"); - error!("connection {:?} established", state.connection_id); + debug!("connection {:?} established", state.connection_id); let _ = mem::replace(self, Connection::Established(state)); Ok(()) } @@ -481,7 +491,6 @@ struct ConnectionTx { connection_id: ConnectionId<'static>, tx_notify: Arc, - tx_flushed: Arc, tx_stats: TxBurst, } @@ -491,6 +500,7 @@ impl ConnectionTx { let mut out = [0u8;MAX_IPV6_BUF_SIZE]; let mut finished_sending = false; + let mut continue_write = false; debug!("connection {:?} tx write", id); 'write: loop { // update stats from connection @@ -534,13 +544,18 @@ impl ConnectionTx { total_write += size; // Use the first packet time to send, not the last. let _ = dst_info.get_or_insert(send_info); + + if size < self.tx_stats.max_datagram_size { + continue_write = true; + break 'fill + } } if total_write == 0 || dst_info.is_none() { trace!("connection {:?} nothing to send", id); - self.tx_flushed.notify_waiters(); + //self.tx_flushed.notify_waiters(); self.tx_notify.notified().await; - continue; + continue 'write; } let dst_info = dst_info.unwrap(); @@ -563,11 +578,15 @@ impl ConnectionTx { } trace!("connection {:?} network sent to={} bytes={}", id, dst_info.to, total_write); + if continue_write { + continue 'write; + } + if finished_sending { trace!("connection {:?} finished sending", id); - // used during connection shutdown - self.tx_flushed.notify_waiters(); - self.tx_notify.notified().await + //self.tx_flushed.notify_waiters(); + self.tx_notify.notified().await; + continue 'write; } } } diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs index 36db0a70b..546383a56 100644 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -4,7 +4,6 @@ use log::{debug, error, trace, warn}; use parking_lot::Mutex; use quiche::ConnectionId; use tokio::net::UdpSocket; -use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::Notify; use pingora_error::{Error, ErrorType, OrErr}; use crate::protocols::ConnectionState; @@ -23,7 +22,7 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result Date: Mon, 13 Jan 2025 15:58:35 +0100 Subject: [PATCH 14/52] use VecDeque for connection & session drop --- pingora-core/src/protocols/http/v3/server.rs | 89 ++++++++------------ pingora-core/src/protocols/l4/quic/mod.rs | 40 +++------ pingora-core/tests/utils/mod.rs | 4 +- 3 files changed, 54 insertions(+), 79 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index ec90e3e50..82d29bfb2 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -15,6 +15,7 @@ //! HTTP/3 server session use std::cmp; +use std::collections::VecDeque; use std::fmt::Debug; use crate::protocols::{Digest, SocketAddr, Stream}; use bytes::{BufMut, Bytes, BytesMut}; @@ -36,7 +37,6 @@ use quiche::{h3, Connection as QuicheConnection, ConnectionId, Shutdown}; use quiche::h3::{Connection as QuicheH3Connection, Event, NameValue}; use tokio::sync::{mpsc, Notify}; use tokio::sync::mpsc::{Receiver, Sender}; -use tokio::sync::mpsc::error::TrySendError; use crate::protocols::http::body_buffer::FixedBuffer; use crate::protocols::http::v3::{event_to_request_headers, header_size, headermap_to_headervec, response_headers_to_event}; use crate::protocols::http::v3::nohash::StreamIdHashMap; @@ -44,7 +44,7 @@ use crate::protocols::http::v3::nohash::StreamIdHashMap; static H3_OPTIONS: OnceLock = OnceLock::new(); const H3_SESSION_EVENTS_CHANNEL_SIZE : usize = 256; -const H3_SESSION_DROP_CHANNEL_SIZE : usize = 1024; +const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; const BODY_BUF_LIMIT: usize = 1024 * 64; const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); const DEFAULT_CONNECTION_IDLE_TIMEOUT: Duration = Duration::from_millis(1000); @@ -62,7 +62,7 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ return Err(Error::explain( ErrorType::InternalError, @@ -78,11 +78,10 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

) -> Result

) -> Result

, - drop_quic_connection: Sender>, + drop_quic_connection: Arc>>>, quic_connection: Arc>, h3_connection: Arc>, tx_notify: Arc, - tx_flushed: Arc, rx_notify: Arc, sessions: StreamIdHashMap>, - drop_sessions: (Sender, Receiver), + drop_sessions: Arc>>, max_accepted_stream_id: u64, received_goaway: Option, @@ -124,16 +121,9 @@ pub struct H3Connection { impl Drop for H3Connection { fn drop(&mut self) { - match self.drop_quic_connection.try_send(self.connection_id.clone()) { - Ok(()) => debug!("drop connection {:?}", self.connection_id), - Err(e) => { - let conn_id = match e { - TrySendError::Full(id) => id, - TrySendError::Closed(id) => id - }; - warn!("failed send drop connection {:?} request", conn_id) - } - } + let mut drop_quic_connection = self.drop_quic_connection.lock(); + drop_quic_connection.push_back(self.connection_id.clone()); + debug!("drop connection {:?}", self.connection_id); } } @@ -179,6 +169,24 @@ impl H3Connection { Ok(()) } } + + async fn sessions_housekeeping(&mut self) { + let mut drop_sessions = self.drop_sessions.lock(); + + // housekeeping finished sessions + while let Some(stream_id) = drop_sessions.pop_front() { + match self.sessions.remove(&stream_id) { + None => { + warn!("connection {:?} failed to remove stream {} from sessions", + self.connection_id, stream_id) + } + Some(_) => { + debug!("connection {:?} stream {} removed from sessions", + self.connection_id, stream_id); + } + }; + } + } } /// HTTP/3 server session @@ -189,11 +197,10 @@ pub struct HttpSession { h3_connection: Arc>, // notify during drop to remove event_tx from active sessions - drop_session: Sender, + drop_session: Arc>>, // trigger Quic send, continue ConnectionTx write loop tx_notify: Arc, - tx_flushed: Arc, // receive notification on Quic recv, used to check stream capacity // as it only increases after MaxData or MaxStreamData frame was received rx_notify: Arc, @@ -225,16 +232,9 @@ pub struct HttpSession { impl Drop for HttpSession { fn drop(&mut self) { - match self.drop_session.try_send(self.stream_id) { - Ok(()) => debug!("H3 connection {:?} drop stream {}", self.connection_id, self.stream_id), - Err(e) => { - let id = match e { - TrySendError::Full(id) => id, - TrySendError::Closed(id) => id - }; - warn!("H3 connection {:?} stream {} failed notify drop session", self.connection_id, id) - } - } + let mut drop_sessions = self.drop_session.lock(); + drop_sessions.push_back(self.stream_id); + debug!("H3 connection {:?} drop stream {}", self.connection_id, self.stream_id); } } @@ -323,10 +323,9 @@ impl HttpSession { quic_connection: conn.quic_connection.clone(), h3_connection: conn.h3_connection.clone(), - drop_session: conn.drop_sessions.0.clone(), + drop_session: conn.drop_sessions.clone(), tx_notify: conn.tx_notify.clone(), - tx_flushed: conn.tx_flushed.clone(), rx_notify: conn.rx_notify.clone(), event_rx, @@ -358,21 +357,7 @@ impl HttpSession { debug!("H3 connection {:?} no events available", conn.connection_id); // TODO: in case PriorityUpdate was triggered call take_priority_update() here - // housekeeping finished sessions - while !conn.drop_sessions.1.is_empty() { - if let Some(stream_id) = conn.drop_sessions.1.recv().await { - match conn.sessions.remove(&stream_id) { - None => { - debug_assert!(false, "failed to remove stream from sessions"); - warn!("failed to remove stream from sessions") - } - Some(_) => { - debug!("connection {:?} stream {} removed from sessions", - conn.connection_id, stream_id); - } - }; - } - } + conn.sessions_housekeeping().await; let is_closed; let timeout; @@ -435,9 +420,10 @@ impl HttpSession { } } } => { + conn.sessions_housekeeping().await; if conn.sessions.len() > 0 { - warn!("connection {:?} timeout {:?} reached with {} open sessions", - conn.connection_id, used_timeout_duration, conn.sessions.len()); + warn!("connection {:?} timeout {:?} reached with {} open sessions {:?}", + conn.connection_id, used_timeout_duration, conn.sessions.len(), conn.sessions); } else { { let mut qconn = conn.quic_connection.lock(); @@ -447,7 +433,6 @@ impl HttpSession { } conn.tx_notify.notify_waiters(); - //conn.tx_flushed.notified().await; return Ok(None) } } diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index ad1793130..b3204b305 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; use std::{io, mem}; use std::fmt::{Debug, Formatter}; use std::io::ErrorKind; @@ -16,7 +16,7 @@ use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio::net::UdpSocket; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::sync::mpsc::error::TryRecvError; -use tokio::sync::{mpsc, Notify}; +use tokio::sync::Notify; use pingora_error::{BError, Error, ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; use tokio::task::JoinHandle; @@ -45,7 +45,7 @@ pub const MAX_IPV6_UDP_PACKET_SIZE: usize = 1452; pub const MAX_IPV6_QUIC_DATAGRAM_SIZE: usize = 1350; const HANDSHAKE_PACKET_BUFFER_SIZE: usize = 64; -const CONNECTION_DROP_CHANNEL_SIZE : usize = 1024; +const CONNECTION_DROP_DEQUE_INITIAL_SIZE: usize = 1024; pub struct Listener { socket: Arc, @@ -55,7 +55,7 @@ pub struct Listener { crypto: Crypto, connections: Mutex, ConnectionHandle>>, - drop_connections: (Sender>, Mutex>>) + drop_connections: Arc>>>, } pub struct Crypto { @@ -70,7 +70,7 @@ pub enum Connection { pub struct IncomingState { connection_id: ConnectionId<'static>, config: Arc>, - drop_connection: Sender>, + drop_connection: Arc>>>, socket: Arc, socket_details: SocketDetails, @@ -94,7 +94,7 @@ pub struct EstablishedState { socket: Arc, pub(crate) connection_id: ConnectionId<'static>, pub(crate) connection: Arc>, - pub(crate) drop_connection: Sender>, + pub(crate) drop_connection: Arc>>>, pub(crate) rx_notify: Arc, pub(crate) tx_notify: Arc, tx_handle: JoinHandle>, @@ -169,7 +169,6 @@ impl TryFrom for Listener { }, }; - let drop_connections = mpsc::channel(CONNECTION_DROP_CHANNEL_SIZE); Ok(Listener { socket: Arc::new(io), socket_details: SocketDetails { @@ -184,7 +183,7 @@ impl TryFrom for Listener { }, connections: Default::default(), - drop_connections: (drop_connections.0, Mutex::new(drop_connections.1)) + drop_connections: Arc::new(Mutex::new(VecDeque::with_capacity(CONNECTION_DROP_DEQUE_INITIAL_SIZE))) }) } } @@ -211,24 +210,13 @@ impl Listener { // cleanup connections { - let mut drop_conn = self.drop_connections.1.lock(); + let mut drop_conn = self.drop_connections.lock(); let mut conn = self.connections.lock(); - 'housekeep: loop { - match drop_conn.try_recv() { - Ok(drop_id) => { - match conn.remove(&drop_id) { - None => error!("failed to remove connection handle {:?}", drop_id), - Some(_) => debug!("removed connection handle {:?} from connections", drop_id) - } - } - Err(e) => match e { - TryRecvError::Empty => break 'housekeep, - TryRecvError::Disconnected => { - debug_assert!(false, "drop connections receiver disconnected"); - break 'housekeep - } - } - }; + while let Some(drop_id) = drop_conn.pop_front() { + match conn.remove(&drop_id) { + None => warn!("failed to remove connection handle {:?} from connections", drop_id), + Some(_) => debug!("removed connection handle {:?} from connections", drop_id) + } } } @@ -350,7 +338,7 @@ impl Listener { let connection = Connection::Incoming(IncomingState { connection_id: conn_id.clone(), config: self.config.clone(), - drop_connection: self.drop_connections.0.clone(), + drop_connection: self.drop_connections.clone(), socket: self.socket.clone(), socket_details: self.socket_details.clone(), diff --git a/pingora-core/tests/utils/mod.rs b/pingora-core/tests/utils/mod.rs index 852df3013..cc219269e 100644 --- a/pingora-core/tests/utils/mod.rs +++ b/pingora-core/tests/utils/mod.rs @@ -102,9 +102,11 @@ fn entry_point(opt: Option) { listeners.add_quic("0.0.0.0:6147"); - let echo_service_http = + let mut echo_service_http = Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); + echo_service_http.threads = Some(8); + my_server.add_service(echo_service_http); my_server.run_forever(); } From c3d44d4757af9726cd899e8f45f54fff984d285c Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Tue, 14 Jan 2025 13:07:25 +0100 Subject: [PATCH 15/52] send on end and capacity required --- pingora-core/src/protocols/http/v3/server.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 82d29bfb2..8308d41ef 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -75,7 +75,6 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ - self.tx_notify.notify_waiters(); - Ok(()) - } + Ok(()) => { Ok(()) }, Err(h3::Error::Done) => { Ok(()) }, Err(e) => Err(e).explain_err( ErrorType::WriteError, |_| "H3 connection failed to write response"), @@ -626,7 +624,6 @@ impl HttpSession { Ok(sent_size) => { debug_assert_eq!(sent_size, send.len()); sent_len += sent_size; - self.tx_notify.notify_waiters(); }, Err(e) => return Err(e).explain_err( ErrorType::WriteError, |_| "writing h3 response body to downstream") @@ -634,6 +631,9 @@ impl HttpSession { } debug_assert_eq!(fin, end); debug_assert_eq!(sent_len, data.len()); + if end { + self.tx_notify.notify_waiters(); + } self.body_sent += sent_len; self.send_ended = self.send_ended || end; @@ -661,7 +661,6 @@ impl HttpSession { Ok(capacity) } else { self.tx_notify.notify_waiters(); - //self.tx_flushed.notified().await; self.rx_notify.notified().await; Box::pin(self.stream_capacity(required)).await } From 8f5dd4e6574f1318f9dd175631e25f2d674c6156 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Tue, 14 Jan 2025 13:14:56 +0100 Subject: [PATCH 16/52] remove mutex on listener connections --- pingora-core/src/protocols/l4/listener.rs | 6 +- pingora-core/src/protocols/l4/quic/mod.rs | 108 ++++++++---------- .../src/protocols/l4/quic/tls_handshake.rs | 1 - 3 files changed, 51 insertions(+), 64 deletions(-) diff --git a/pingora-core/src/protocols/l4/listener.rs b/pingora-core/src/protocols/l4/listener.rs index 2bcefe682..1c6b68f84 100644 --- a/pingora-core/src/protocols/l4/listener.rs +++ b/pingora-core/src/protocols/l4/listener.rs @@ -77,10 +77,8 @@ impl AsRawSocket for Listener { impl Listener { /// Accept a connection from the listening endpoint - pub async fn accept(&self) -> io::Result { - // TODO: changing to &mut self would help to simplify connection state locks for Quic - // not required for TCP/UDS, feasible to change to unique (mut) access? - match &self { + pub async fn accept(&mut self) -> io::Result { + match self { Self::Quic(l) => { // TODO: update digest when peer_addr changes; // a Quic connection supports IP address switching; diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index b3204b305..6d72c955d 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -54,7 +54,7 @@ pub struct Listener { config: Arc>, crypto: Crypto, - connections: Mutex, ConnectionHandle>>, + connections: HashMap, ConnectionHandle>, drop_connections: Arc>>>, } @@ -189,7 +189,7 @@ impl TryFrom for Listener { } impl Listener { - pub(crate) async fn accept(&self) -> io::Result<(L4Stream, SocketAddr)> { + pub(crate) async fn accept(&mut self) -> io::Result<(L4Stream, SocketAddr)> { let mut rx_buf = [0u8; MAX_IPV6_BUF_SIZE]; debug!("endpoint rx loop"); @@ -211,9 +211,8 @@ impl Listener { // cleanup connections { let mut drop_conn = self.drop_connections.lock(); - let mut conn = self.connections.lock(); while let Some(drop_id) = drop_conn.pop_front() { - match conn.remove(&drop_id) { + match self.connections.remove(&drop_id) { None => warn!("failed to remove connection handle {:?} from connections", drop_id), Some(_) => debug!("removed connection handle {:?} from connections", drop_id) } @@ -241,66 +240,48 @@ impl Listener { let mut conn_id = header.dcid.clone(); let mut udp_tx = None; - { - let mut connections = self.connections.lock(); - // send to corresponding connection - let mut handle; - handle = connections.get_mut(&conn_id); - if handle.is_none() { - conn_id = Self::gen_cid(&self.crypto.key, &header); - handle = connections.get_mut(&conn_id); - }; + // send to corresponding connection + let mut handle; + handle = self.connections.get_mut(&conn_id); + if handle.is_none() { + conn_id = Self::gen_cid(&self.crypto.key, &header); + handle = self.connections.get_mut(&conn_id); + }; - trace!("connection {:?} network received from={} length={}", conn_id, from, size); + trace!("connection {:?} network received from={} length={}", conn_id, from, size); - if let Some(handle) = handle { - debug!("existing connection {:?} {:?} {:?}", conn_id, handle, header); - let mut established_handle = None; - match handle { - ConnectionHandle::Incoming(i) => { - let resp; - { - resp = i.response.lock().take(); - } - if let Some(resp) = resp { - match resp { - HandshakeResponse::Established(e) => { - debug!("connection {:?} received HandshakeResponse::Established", conn_id); - // receive data into existing connection - established_handle = Some(e); - } - HandshakeResponse::Ignored - | HandshakeResponse::Rejected => { - connections.remove(&header.dcid); - continue 'read - } - } - } else { - udp_tx = Some(i.udp_tx.clone()); - } + if let Some(handle) = handle { + debug!("existing connection {:?} {:?} {:?}", conn_id, handle, header); + let mut established_handle = None; + match handle { + ConnectionHandle::Incoming(i) => { + let resp; + { + resp = i.response.lock().take(); } - ConnectionHandle::Established(e) => { - // receive data into existing connection - match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { - Ok(_len) => { - e.rx_notify.notify_waiters(); - e.tx_notify.notify_waiters(); - continue 'read; + if let Some(resp) = resp { + match resp { + HandshakeResponse::Established(e) => { + debug!("connection {:?} received HandshakeResponse::Established", conn_id); + // receive data into existing connection + established_handle = Some(e); } - Err(e) => { - // TODO: take action on errors, e.g close connection, send & remove - break 'read Err(e); + HandshakeResponse::Ignored + | HandshakeResponse::Rejected => { + self.connections.remove(&header.dcid); + continue 'read } } + } else { + udp_tx = Some(i.udp_tx.clone()); } } - if let Some(e) = established_handle { + ConnectionHandle::Established(e) => { + // receive data into existing connection match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { Ok(_len) => { e.rx_notify.notify_waiters(); e.tx_notify.notify_waiters(); - // transition connection - handle.establish(e); continue 'read; } Err(e) => { @@ -310,6 +291,21 @@ impl Listener { } } } + if let Some(e) = established_handle { + match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { + Ok(_len) => { + e.rx_notify.notify_waiters(); + e.tx_notify.notify_waiters(); + // transition connection + handle.establish(e); + continue 'read; + } + Err(e) => { + // TODO: take action on errors, e.g close connection, send & remove + break 'read Err(e); + } + } + } }; if let Some(udp_tx) = udp_tx { // receive data on UDP channel @@ -359,11 +355,7 @@ impl Listener { response, }); - { - let mut connections = self.connections.lock(); - connections.insert(conn_id, handle); - } - + self.connections.insert(conn_id, handle); return Ok((connection.into(), from)) } } @@ -541,7 +533,6 @@ impl ConnectionTx { if total_write == 0 || dst_info.is_none() { trace!("connection {:?} nothing to send", id); - //self.tx_flushed.notify_waiters(); self.tx_notify.notified().await; continue 'write; } @@ -572,7 +563,6 @@ impl ConnectionTx { if finished_sending { trace!("connection {:?} finished sending", id); - //self.tx_flushed.notify_waiters(); self.tx_notify.notified().await; continue 'write; } diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs index 546383a56..a4ee250a9 100644 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -22,7 +22,6 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result Date: Tue, 14 Jan 2025 13:34:46 +0100 Subject: [PATCH 17/52] move tls_handshake to protocols::tls::quic --- pingora-core/src/listeners/mod.rs | 2 +- pingora-core/src/protocols/l4/listener.rs | 11 +- .../src/protocols/l4/quic/id_token.rs | 4 +- .../src/protocols/l4/quic/listener.rs | 280 +++++++++++++++ pingora-core/src/protocols/l4/quic/mod.rs | 326 ++---------------- .../src/protocols/l4/quic/tls_handshake.rs | 287 --------------- pingora-core/src/protocols/tls/mod.rs | 1 + pingora-core/src/protocols/tls/quic/mod.rs | 287 +++++++++++++++ pingora-core/tests/utils/mod.rs | 3 +- 9 files changed, 609 insertions(+), 592 deletions(-) create mode 100644 pingora-core/src/protocols/l4/quic/listener.rs create mode 100644 pingora-core/src/protocols/tls/quic/mod.rs diff --git a/pingora-core/src/listeners/mod.rs b/pingora-core/src/listeners/mod.rs index 3b70204b3..fc3e191a1 100644 --- a/pingora-core/src/listeners/mod.rs +++ b/pingora-core/src/listeners/mod.rs @@ -23,7 +23,7 @@ pub mod tls; pub use crate::tls::listeners as tls; use crate::protocols::{tls::TlsRef, ConnectionState, Stream}; -use crate::protocols::l4::quic::tls_handshake::handshake as quic_handshake; +use crate::protocols::tls::quic::handshake as quic_handshake; #[cfg(unix)] use crate::server::ListenFds; diff --git a/pingora-core/src/protocols/l4/listener.rs b/pingora-core/src/protocols/l4/listener.rs index 1c6b68f84..7dc677279 100644 --- a/pingora-core/src/protocols/l4/listener.rs +++ b/pingora-core/src/protocols/l4/listener.rs @@ -55,16 +55,7 @@ impl From for Listener { } } -#[cfg(unix)] -impl AsRawFd for Listener { - fn as_raw_fd(&self) -> std::os::unix::io::RawFd { - match &self { - Self::Quic(l) => l.get_raw_fd(), - Self::Tcp(l) => l.as_raw_fd(), - Self::Unix(l) => l.as_raw_fd(), - } - } -} + #[cfg(windows)] impl AsRawSocket for Listener { diff --git a/pingora-core/src/protocols/l4/quic/id_token.rs b/pingora-core/src/protocols/l4/quic/id_token.rs index 6fec23f68..b4d2a61b9 100644 --- a/pingora-core/src/protocols/l4/quic/id_token.rs +++ b/pingora-core/src/protocols/l4/quic/id_token.rs @@ -34,7 +34,7 @@ use std::net; /// /// Note that this function is only an example and doesn't do any cryptographic /// authenticate of the token. *It should not be used in production system*. -pub(super) fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec { +pub(crate) fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec { // TODO: implement token generation/validation using crypto let mut token = Vec::new(); @@ -58,7 +58,7 @@ pub(super) fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec /// /// Note that this function is only an example and doesn't do any cryptographic /// authenticate of the token. *It should not be used in production system*. -pub(super) fn validate_token<'a>( +pub(crate) fn validate_token<'a>( src: &net::SocketAddr, token: &'a [u8], ) -> Option> { // TODO: implement token generation/validation using crypto diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs new file mode 100644 index 000000000..b51d4943f --- /dev/null +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -0,0 +1,280 @@ +use std::collections::VecDeque; +use std::io; +use std::io::ErrorKind; +use std::net::SocketAddr; +use std::os::fd::{AsRawFd, RawFd}; +use std::sync::Arc; +use log::{debug, error, trace, warn}; +use parking_lot::Mutex; +use quiche::{ConnectionId, Header, RecvInfo, Type}; +use ring::hmac::Key; +use ring::rand::SystemRandom; +use tokio::net::UdpSocket; +use tokio::sync::mpsc::channel; +use pingora_error::{BError, Error, ErrorType}; +use crate::protocols::l4::quic::{Connection, ConnectionHandle, Crypto, HandshakeResponse, IncomingHandle, IncomingState, Listener, SocketDetails, UdpRecv, CONNECTION_DROP_DEQUE_INITIAL_SIZE, HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, MAX_IPV6_QUIC_DATAGRAM_SIZE}; +use crate::protocols::l4::quic::sendto::{detect_gso, set_txtime_sockopt}; + +use quiche::Connection as QuicheConnection; + +impl TryFrom for Listener { + type Error = BError; + + fn try_from(io: UdpSocket) -> pingora_error::Result { + let addr = io.local_addr() + .map_err(|e| Error::explain( + ErrorType::SocketError, + format!("failed to get local address from socket: {}", e)))?; + let rng = SystemRandom::new(); + let key = Key::generate(ring::hmac::HMAC_SHA256, &rng) + .map_err(|e| Error::explain( + ErrorType::InternalError, + format!("failed to generate listener key: {}", e)))?; + + let settings = crate::protocols::l4::quic::settings::Settings::try_default()?; + + let gso_enabled = detect_gso(&io, MAX_IPV6_QUIC_DATAGRAM_SIZE); + let pacing_enabled = match set_txtime_sockopt(&io) { + Ok(_) => { + debug!("successfully set SO_TXTIME socket option"); + true + }, + Err(e) => { + debug!("setsockopt failed {:?}", e); + false + }, + }; + + Ok(Listener { + socket: Arc::new(io), + socket_details: SocketDetails { + addr, + gso_enabled, + pacing_enabled, + }, + + config: settings.get_config(), + crypto: Crypto { + key + }, + + connections: Default::default(), + drop_connections: Arc::new(Mutex::new(VecDeque::with_capacity(CONNECTION_DROP_DEQUE_INITIAL_SIZE))) + }) + } +} + +impl Listener { + pub(crate) async fn accept(&mut self) -> io::Result<(crate::protocols::l4::stream::Stream, SocketAddr)> { + let mut rx_buf = [0u8; MAX_IPV6_BUF_SIZE]; + + debug!("endpoint rx loop"); + 'read: loop { + // receive from network and parse Quic header + let (size, from) = match self.socket.try_recv_from(&mut rx_buf) { + Ok((size, from)) => (size, from), + Err(e) => { + if e.kind() == ErrorKind::WouldBlock { + // no more UDP packets to read for now, wait for new packets + self.socket.readable().await?; + continue 'read; + } else { + return Err(e) + } + } + }; + + // cleanup connections + { + let mut drop_conn = self.drop_connections.lock(); + while let Some(drop_id) = drop_conn.pop_front() { + match self.connections.remove(&drop_id) { + None => warn!("failed to remove connection handle {:?} from connections", drop_id), + Some(_) => debug!("removed connection handle {:?} from connections", drop_id) + } + } + } + + // parse the Quic packet's header + let header = match Header::from_slice(rx_buf[..size].as_mut(), quiche::MAX_CONN_ID_LEN) { + Ok(hdr) => hdr, + Err(e) => { + warn!("Parsing Quic packet header failed with error: {:?}.", e); + trace!("Dropped packet due to invalid header. Continuing..."); + continue 'read; + } + }; + + // TODO: allow for connection id updates during lifetime + // connection needs to be able to update source_ids() or destination_ids() + + let recv_info = RecvInfo { + to: self.socket_details.addr, + from, + }; + + let mut conn_id = header.dcid.clone(); + let mut udp_tx = None; + + // send to corresponding connection + let mut handle; + handle = self.connections.get_mut(&conn_id); + if handle.is_none() { + conn_id = Self::gen_cid(&self.crypto.key, &header); + handle = self.connections.get_mut(&conn_id); + }; + + trace!("connection {:?} network received from={} length={}", conn_id, from, size); + + if let Some(handle) = handle { + debug!("existing connection {:?} {:?} {:?}", conn_id, handle, header); + let mut established_handle = None; + match handle { + ConnectionHandle::Incoming(i) => { + let resp; + { + resp = i.response.lock().take(); + } + if let Some(resp) = resp { + match resp { + HandshakeResponse::Established(e) => { + debug!("connection {:?} received HandshakeResponse::Established", conn_id); + // receive data into existing connection + established_handle = Some(e); + } + HandshakeResponse::Ignored + | HandshakeResponse::Rejected => { + self.connections.remove(&header.dcid); + continue 'read + } + } + } else { + udp_tx = Some(i.udp_tx.clone()); + } + } + ConnectionHandle::Established(e) => { + // receive data into existing connection + match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { + Ok(_len) => { + e.rx_notify.notify_waiters(); + e.tx_notify.notify_waiters(); + continue 'read; + } + Err(e) => { + // TODO: take action on errors, e.g close connection, send & remove + break 'read Err(e); + } + } + } + } + if let Some(e) = established_handle { + match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { + Ok(_len) => { + e.rx_notify.notify_waiters(); + e.tx_notify.notify_waiters(); + // transition connection + handle.establish(e); + continue 'read; + } + Err(e) => { + // TODO: take action on errors, e.g close connection, send & remove + break 'read Err(e); + } + } + } + }; + if let Some(udp_tx) = udp_tx { + // receive data on UDP channel + match udp_tx.send(UdpRecv { + pkt: rx_buf[..size].to_vec(), + header, + recv_info, + }).await { + Ok(()) => {}, + Err(e) => warn!("sending dgram to connection {:?} failed with error: {}", conn_id, e) + } + continue 'read; + } + + + if header.ty != Type::Initial { + debug!("Quic packet type is not \"Initial\". Header: {:?}. Continuing...", header); + continue 'read; + } + + // create incoming connection & handle + let (udp_tx, udp_rx) = channel::(HANDSHAKE_PACKET_BUFFER_SIZE); + let response = Arc::new(Mutex::new(None)); + + debug!("new incoming connection {:?}", conn_id); + let connection = Connection::Incoming(IncomingState { + connection_id: conn_id.clone(), + config: self.config.clone(), + drop_connection: self.drop_connections.clone(), + + socket: self.socket.clone(), + socket_details: self.socket_details.clone(), + udp_rx, + response: response.clone(), + + dgram: UdpRecv { + pkt: rx_buf[..size].to_vec(), + header, + recv_info, + }, + + ignore: false, + reject: false, + }); + let handle = ConnectionHandle::Incoming(IncomingHandle { + udp_tx, + response, + }); + + self.connections.insert(conn_id, handle); + return Ok((connection.into(), from)) + } + } + + fn recv_connection(conn_id: &ConnectionId<'_>, conn: &Mutex, mut rx_buf: &mut [u8], recv_info: RecvInfo) -> io::Result { + let size = rx_buf.len(); + let mut conn = conn.lock(); + match conn.recv(&mut rx_buf, recv_info) { + Ok(len) => { + debug!("connection {:?} received data length={}", conn_id, len); + debug_assert_eq!(size, len, "size received on connection not equal to len received from network."); + Ok(len) + } + Err(e) => { + error!("connection {:?} receive error {:?}", conn_id, e); + Err(io::Error::new( + io::ErrorKind::BrokenPipe, + format!("Connection could not receive network data for {:?}. {:?}", + conn.destination_id(), e))) + } + } + } + + fn gen_cid(key: &Key, hdr: &Header) -> ConnectionId<'static> { + let conn_id = ring::hmac::sign(key, &hdr.dcid); + let conn_id = conn_id.as_ref()[..quiche::MAX_CONN_ID_LEN].to_vec(); + let conn_id = ConnectionId::from(conn_id); + trace!("generated connection id {:?}", conn_id); + conn_id + } + + pub(super) fn get_raw_fd(&self) -> RawFd { + self.socket.as_raw_fd() + } +} + +#[cfg(unix)] +impl AsRawFd for crate::protocols::l4::listener::Listener { + fn as_raw_fd(&self) -> RawFd { + match &self { + Self::Quic(l) => l.get_raw_fd(), + Self::Tcp(l) => l.as_raw_fd(), + Self::Unix(l) => l.as_raw_fd(), + } + } +} \ No newline at end of file diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 6d72c955d..32df2ed44 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -1,35 +1,32 @@ use std::collections::{HashMap, VecDeque}; use std::{io, mem}; use std::fmt::{Debug, Formatter}; -use std::io::ErrorKind; use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use log::{debug, error, trace, warn}; +use log::{debug, error, trace}; use parking_lot::Mutex; -use quiche::{Config, ConnectionId, Header, RecvInfo, Stats, Type}; +use quiche::{Config, ConnectionId, Header, RecvInfo, Stats}; use ring::hmac::Key; -use ring::rand::SystemRandom; use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio::net::UdpSocket; -use tokio::sync::mpsc::{channel, Receiver, Sender}; +use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::Notify; -use pingora_error::{BError, Error, ErrorType, OrErr, Result}; +use pingora_error::{Error, ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; use tokio::task::JoinHandle; -use settings::Settings as QuicSettings; mod sendto; -mod id_token; +pub(crate) mod id_token; pub(crate) mod tls_handshake; mod settings; +mod listener; use crate::protocols::ConnectionState; -use crate::protocols::l4::quic::sendto::{detect_gso, send_to, set_txtime_sockopt}; -use crate::protocols::l4::stream::Stream as L4Stream; +use crate::protocols::l4::quic::sendto::send_to; // UDP header 8 bytes, IPv4 Header 20 bytes //pub const MAX_IPV4_BUF_SIZE: usize = 65507; @@ -68,36 +65,36 @@ pub enum Connection { } pub struct IncomingState { - connection_id: ConnectionId<'static>, - config: Arc>, - drop_connection: Arc>>>, + pub(crate) connection_id: ConnectionId<'static>, + pub(crate) config: Arc>, + pub(crate) drop_connection: Arc>>>, - socket: Arc, - socket_details: SocketDetails, - udp_rx: Receiver, - response: Arc>>, + pub(crate) socket: Arc, + pub(crate) socket_details: SocketDetails, + pub(crate) udp_rx: Receiver, + pub(crate) response: Arc>>, - dgram: UdpRecv, + pub(crate) dgram: UdpRecv, - ignore: bool, - reject: bool + pub(crate) ignore: bool, + pub(crate) reject: bool } #[derive(Clone)] -struct SocketDetails { +pub(crate) struct SocketDetails { addr: SocketAddr, gso_enabled: bool, pacing_enabled: bool, } pub struct EstablishedState { - socket: Arc, pub(crate) connection_id: ConnectionId<'static>, pub(crate) connection: Arc>, pub(crate) drop_connection: Arc>>>, pub(crate) rx_notify: Arc, pub(crate) tx_notify: Arc, - tx_handle: JoinHandle>, + pub(crate) socket: Arc, + pub(crate) tx_handle: JoinHandle>, } pub enum ConnectionHandle { @@ -129,10 +126,10 @@ pub(crate) enum HandshakeResponse { #[derive(Clone)] pub struct EstablishedHandle { - connection_id: ConnectionId<'static>, - connection: Arc>, - rx_notify: Arc, - tx_notify: Arc, + pub(crate) connection_id: ConnectionId<'static>, + pub(crate) connection: Arc>, + pub(crate) rx_notify: Arc, + pub(crate) tx_notify: Arc, } pub struct UdpRecv { @@ -141,257 +138,6 @@ pub struct UdpRecv { pub(crate) recv_info: RecvInfo, } -impl TryFrom for Listener { - type Error = BError; - - fn try_from(io: UdpSocket) -> Result { - let addr = io.local_addr() - .map_err(|e| Error::explain( - ErrorType::SocketError, - format!("failed to get local address from socket: {}", e)))?; - let rng = SystemRandom::new(); - let key = Key::generate(ring::hmac::HMAC_SHA256, &rng) - .map_err(|e| Error::explain( - ErrorType::InternalError, - format!("failed to generate listener key: {}", e)))?; - - let settings = QuicSettings::try_default()?; - - let gso_enabled = detect_gso(&io, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let pacing_enabled = match set_txtime_sockopt(&io) { - Ok(_) => { - debug!("successfully set SO_TXTIME socket option"); - true - }, - Err(e) => { - debug!("setsockopt failed {:?}", e); - false - }, - }; - - Ok(Listener { - socket: Arc::new(io), - socket_details: SocketDetails { - addr, - gso_enabled, - pacing_enabled, - }, - - config: settings.get_config(), - crypto: Crypto { - key - }, - - connections: Default::default(), - drop_connections: Arc::new(Mutex::new(VecDeque::with_capacity(CONNECTION_DROP_DEQUE_INITIAL_SIZE))) - }) - } -} - -impl Listener { - pub(crate) async fn accept(&mut self) -> io::Result<(L4Stream, SocketAddr)> { - let mut rx_buf = [0u8; MAX_IPV6_BUF_SIZE]; - - debug!("endpoint rx loop"); - 'read: loop { - // receive from network and parse Quic header - let (size, from) = match self.socket.try_recv_from(&mut rx_buf) { - Ok((size, from)) => (size, from), - Err(e) => { - if e.kind() == ErrorKind::WouldBlock { - // no more UDP packets to read for now, wait for new packets - self.socket.readable().await?; - continue 'read; - } else { - return Err(e) - } - } - }; - - // cleanup connections - { - let mut drop_conn = self.drop_connections.lock(); - while let Some(drop_id) = drop_conn.pop_front() { - match self.connections.remove(&drop_id) { - None => warn!("failed to remove connection handle {:?} from connections", drop_id), - Some(_) => debug!("removed connection handle {:?} from connections", drop_id) - } - } - } - - // parse the Quic packet's header - let header = match Header::from_slice(rx_buf[..size].as_mut(), quiche::MAX_CONN_ID_LEN) { - Ok(hdr) => hdr, - Err(e) => { - warn!("Parsing Quic packet header failed with error: {:?}.", e); - trace!("Dropped packet due to invalid header. Continuing..."); - continue 'read; - } - }; - - // TODO: allow for connection id updates during lifetime - // connection needs to be able to update source_ids() or destination_ids() - - let recv_info = RecvInfo { - to: self.socket_details.addr, - from, - }; - - let mut conn_id = header.dcid.clone(); - let mut udp_tx = None; - - // send to corresponding connection - let mut handle; - handle = self.connections.get_mut(&conn_id); - if handle.is_none() { - conn_id = Self::gen_cid(&self.crypto.key, &header); - handle = self.connections.get_mut(&conn_id); - }; - - trace!("connection {:?} network received from={} length={}", conn_id, from, size); - - if let Some(handle) = handle { - debug!("existing connection {:?} {:?} {:?}", conn_id, handle, header); - let mut established_handle = None; - match handle { - ConnectionHandle::Incoming(i) => { - let resp; - { - resp = i.response.lock().take(); - } - if let Some(resp) = resp { - match resp { - HandshakeResponse::Established(e) => { - debug!("connection {:?} received HandshakeResponse::Established", conn_id); - // receive data into existing connection - established_handle = Some(e); - } - HandshakeResponse::Ignored - | HandshakeResponse::Rejected => { - self.connections.remove(&header.dcid); - continue 'read - } - } - } else { - udp_tx = Some(i.udp_tx.clone()); - } - } - ConnectionHandle::Established(e) => { - // receive data into existing connection - match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { - Ok(_len) => { - e.rx_notify.notify_waiters(); - e.tx_notify.notify_waiters(); - continue 'read; - } - Err(e) => { - // TODO: take action on errors, e.g close connection, send & remove - break 'read Err(e); - } - } - } - } - if let Some(e) = established_handle { - match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { - Ok(_len) => { - e.rx_notify.notify_waiters(); - e.tx_notify.notify_waiters(); - // transition connection - handle.establish(e); - continue 'read; - } - Err(e) => { - // TODO: take action on errors, e.g close connection, send & remove - break 'read Err(e); - } - } - } - }; - if let Some(udp_tx) = udp_tx { - // receive data on UDP channel - match udp_tx.send(UdpRecv { - pkt: rx_buf[..size].to_vec(), - header, - recv_info, - }).await { - Ok(()) => {}, - Err(e) => warn!("sending dgram to connection {:?} failed with error: {}", conn_id, e) - } - continue 'read; - } - - - if header.ty != Type::Initial { - debug!("Quic packet type is not \"Initial\". Header: {:?}. Continuing...", header); - continue 'read; - } - - // create incoming connection & handle - let (udp_tx, udp_rx) = channel::(HANDSHAKE_PACKET_BUFFER_SIZE); - let response = Arc::new(Mutex::new(None)); - - debug!("new incoming connection {:?}", conn_id); - let connection = Connection::Incoming(IncomingState { - connection_id: conn_id.clone(), - config: self.config.clone(), - drop_connection: self.drop_connections.clone(), - - socket: self.socket.clone(), - socket_details: self.socket_details.clone(), - udp_rx, - response: response.clone(), - - dgram: UdpRecv { - pkt: rx_buf[..size].to_vec(), - header, - recv_info, - }, - - ignore: false, - reject: false, - }); - let handle = ConnectionHandle::Incoming(IncomingHandle { - udp_tx, - response, - }); - - self.connections.insert(conn_id, handle); - return Ok((connection.into(), from)) - } - } - - fn recv_connection(conn_id: &ConnectionId<'_>, conn: &Mutex, mut rx_buf: &mut [u8], recv_info: RecvInfo) -> io::Result { - let size = rx_buf.len(); - let mut conn = conn.lock(); - match conn.recv(&mut rx_buf, recv_info) { - Ok(len) => { - debug!("connection {:?} received data length={}", conn_id, len); - debug_assert_eq!(size, len, "size received on connection not equal to len received from network."); - Ok(len) - } - Err(e) => { - error!("connection {:?} receive error {:?}", conn_id, e); - Err(io::Error::new( - io::ErrorKind::BrokenPipe, - format!("Connection could not receive network data for {:?}. {:?}", - conn.destination_id(), e))) - } - } - } - - fn gen_cid(key: &Key, hdr: &Header) -> ConnectionId<'static> { - let conn_id = ring::hmac::sign(key, &hdr.dcid); - let conn_id = conn_id.as_ref()[..quiche::MAX_CONN_ID_LEN].to_vec(); - let conn_id = ConnectionId::from(conn_id); - trace!("generated connection id {:?}", conn_id); - conn_id - } - - pub(super) fn get_raw_fd(&self) -> RawFd { - self.socket.as_raw_fd() - } -} - impl ConnectionHandle { fn establish(&mut self, handle: EstablishedHandle) { match self { @@ -405,7 +151,7 @@ impl ConnectionHandle { } impl Connection { - fn establish(&mut self, state: EstablishedState) -> Result<()> { + pub(crate) fn establish(&mut self, state: EstablishedState) -> Result<()> { if cfg!(test) { let conn = state.connection.lock(); debug_assert!(conn.is_established() || conn.is_in_early_data(), @@ -463,19 +209,19 @@ impl Drop for Connection { } } -struct ConnectionTx { - socket: Arc, - socket_details: SocketDetails, +pub(crate) struct ConnectionTx { + pub(crate) socket: Arc, + pub(crate) socket_details: SocketDetails, - connection: Arc>, - connection_id: ConnectionId<'static>, + pub(crate) connection: Arc>, + pub(crate) connection_id: ConnectionId<'static>, - tx_notify: Arc, - tx_stats: TxBurst, + pub(crate) tx_notify: Arc, + pub(crate) tx_stats: TxStats, } impl ConnectionTx { - async fn start_tx(mut self) -> Result<()> { + pub(crate) async fn start_tx(mut self) -> Result<()> { let id = self.connection_id; let mut out = [0u8;MAX_IPV6_BUF_SIZE]; @@ -570,14 +316,14 @@ impl ConnectionTx { } } -pub struct TxBurst { +pub struct TxStats { loss_rate: f64, max_send_burst: usize, max_datagram_size: usize } -impl TxBurst { - fn new(max_send_udp_payload_size: usize) -> Self { +impl TxStats { + pub(crate) fn new(max_send_udp_payload_size: usize) -> Self { Self { loss_rate: 0.0, max_send_burst: MAX_IPV6_BUF_SIZE, diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs index a4ee250a9..e69de29bb 100644 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -1,287 +0,0 @@ -use std::net::SocketAddr; -use std::sync::Arc; -use log::{debug, error, trace, warn}; -use parking_lot::Mutex; -use quiche::ConnectionId; -use tokio::net::UdpSocket; -use tokio::sync::Notify; -use pingora_error::{Error, ErrorType, OrErr}; -use crate::protocols::ConnectionState; -use crate::protocols::l4::quic::{Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, IncomingState, TxBurst, MAX_IPV6_UDP_PACKET_SIZE}; -use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; -use crate::protocols::l4::stream::Stream as L4Stream; - -pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { - let Some(connection) = stream.quic_connection_state() else { - debug_assert!(false, "quic::handshake called on stream of another type"); - return Err(Error::explain(ErrorType::InternalError, "stream is not a quic stream")) - }; - - let e_state = match connection { - Connection::Incoming(i) => { - if let Some(e_state) = handshake_inner(i).await? { - // send HANDSHAKE_DONE Quic frame on established connection - e_state.tx_notify.notify_waiters(); - Some(e_state) - } else { - debug!("handshake either rejected or ignored for connection {:?}", i.connection_id); - None - } - } - Connection::Established(_) => { - debug_assert!(false, "quic::handshake on already established connection"); - return Err(Error::explain(ErrorType::HandshakeError, "handshake state not of type incoming")) - } - }; - - if let Some(e_state) = e_state { - connection.establish(e_state)?; - Ok(stream) - } else { - Err(Error::explain(ErrorType::HandshakeError, "handshake rejected or ignored")) - } -} - -async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result> { - let IncomingState { - connection_id: conn_id, - config, - drop_connection, - - socket, - socket_details, - udp_rx, - dgram, - - response, - - ignore, - reject - } = state; - - if *ignore { - { - let mut resp = response.lock(); - *resp = Some(HandshakeResponse::Ignored) - } - return Ok(None); - } else if *reject { - { - let mut resp = response.lock(); - *resp = Some(HandshakeResponse::Rejected) - } - return Ok(None); - // TODO: send to peer, return err if send fails - } - - let initial_dcid = dgram.header.dcid.clone(); - - // TODO: use correct buf sizes for IPv4 & IPv6 - // for now use IPv6 values as they are smaller, should work as well on IPv4 - let mut out = [0u8; MAX_IPV6_UDP_PACKET_SIZE]; - - if !quiche::version_is_supported(dgram.header.version) { - warn!("Quic packet version received is not supported. Negotiating version..."); - let size = quiche::negotiate_version(&dgram.header.scid, &dgram.header.dcid, &mut out) - .explain_err( - ErrorType::HandshakeError, |_| "creating version negotiation packet failed")?; - - // send data to network - send_dgram(conn_id, &socket, &out[..size], dgram.recv_info.from).await - .explain_err( - ErrorType::WriteError, |_| "sending version negotiation packet failed")?; - - // validate response - if let Some(resp_dgram) = udp_rx.recv().await { - if quiche::version_is_supported(resp_dgram.header.version) { - *dgram = resp_dgram - } else { - return Err(Error::explain( - ErrorType::HandshakeError, - "version negotiation failed as responded version is not supported")); - }; - } else { - return Err(Error::explain( - ErrorType::HandshakeError,"version negotiation did not receive a response")); - } - }; - - // token is always present in "Initial" packets - let token = dgram.header.token.as_ref().unwrap(); - // do stateless retry if the client didn't send a token - if token.is_empty() { - trace!("connection {:?} stateless retry as Quic header token is empty", conn_id); - - let hdr = &dgram.header; - let new_token = mint_token(&hdr, &dgram.recv_info.from); - let size = quiche::retry( - &hdr.scid, - &hdr.dcid, - &conn_id, - &new_token, - hdr.version, - &mut out, - ).explain_err(ErrorType::HandshakeError, |_| "creating retry packet failed")?; - - send_dgram(&conn_id, &socket, &out[..size], dgram.recv_info.from).await - .explain_err(ErrorType::WriteError, |_| "sending retry packet failed")?; - - // validate response - if let Some(resp_dgram) = udp_rx.recv().await { - // token is always present in "Initial" packets - let resp_token = resp_dgram.header.token.as_ref().unwrap(); - if resp_token.is_empty() { - return Err(Error::explain( - ErrorType::HandshakeError, - "Stateless retry failed. Still no token available after stateless retry.".to_string())); - } else { - *dgram = resp_dgram; - }; - } else { - return Err(Error::explain( - ErrorType::HandshakeError, - "Stateless retry did not receive a response.".to_string())); - } - } - - let hdr = &dgram.header; - let token = hdr.token.as_ref().unwrap(); - let odcid = validate_token(&dgram.recv_info.from, token); - - // The token was not valid, meaning the retry failed, so drop the connection. - if odcid.is_none() { - return Err(Error::explain( - ErrorType::HandshakeError, - "Quic header has invalid address validation token.".to_string())); - } - - // The destination id was not valid, so drop the connection. - if conn_id.len() != hdr.dcid.len() { - return Err(Error::explain( - ErrorType::HandshakeError, - "Quic header has invalid destination connection id.".to_string())); - } - - // Reuse the source connection ID we sent in the Retry packet, - // instead of changing it again. - debug!("new connection {:?} odcid={:?} scid={:?} ", hdr.dcid, initial_dcid, hdr.scid); - - let mut conn; - { - let mut config = config.lock(); - conn = quiche::accept(&hdr.dcid, Some(&initial_dcid), dgram.recv_info.to, dgram.recv_info.from, &mut config) - .explain_err(ErrorType::HandshakeError, |_| "connection instantiation failed")?; - } - - // receive quic data into connection - let buf = dgram.pkt.as_mut_slice(); - conn.recv(buf, dgram.recv_info) - .explain_err(ErrorType::HandshakeError, |_| "receiving initial data failed")?; - - debug!("connection {:?} starting handshake", conn_id); - // RSA handshake requires more than one packet - while !conn.is_established() { - trace!("connection {:?} creating handshake packet", conn_id); - 'tx: loop { - let (size, info) = match conn.send(out.as_mut_slice()) { - Ok((size, info)) => (size, info), - Err(quiche::Error::Done) => break 'tx, - Err(e) => return Err(e).explain_err( - ErrorType::WriteError, |_| "creating handshake packet failed"), - }; - - trace!("connection {:?} sending handshake packet", conn_id); - send_dgram(&conn_id, &socket, &out[..size], info.to).await - .explain_err(ErrorType::WriteError, |_| "sending handshake packet failed")?; - } - - trace!("connection {:?} waiting for handshake response", conn_id); - 'rx: loop { - if let Some(mut dgram) = udp_rx.recv().await { - trace!("connection {:?} received handshake response", conn_id); - conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) - .explain_err( - ErrorType::HandshakeError, |_| "receiving handshake response failed")?; - } else { - return Err(Error::explain( - ErrorType::HandshakeError, - "finishing handshake failed, did not receive a response")); - } - if udp_rx.is_empty() { - break 'rx; - } - } - - trace!("connection {:?} established={}, early_data={}, closed={}, draining={}, readable={}, timed_out={}, resumed={}", - conn_id, conn.is_established(), conn.is_in_early_data(), conn.is_closed(), - conn.is_draining(), conn.is_readable(), conn.is_timed_out(), conn.is_resumed()); - - trace!("connection {:?} peer_error={:?}, local_error={:?}", conn_id, conn.peer_error(), conn.local_error()); - if let Some(e) = conn.peer_error() { - error!("connection {:?} peer error reason: {}", conn_id, String::from_utf8_lossy(e.reason.as_slice()).to_string()); - } - if let Some(e) = conn.local_error() { - error!("connection {:?} local error reason: {}", conn_id, String::from_utf8_lossy(e.reason.as_slice()).to_string()); - } - } - - let max_send_udp_payload_size = conn.max_send_udp_payload_size(); - let connection_id = conn_id; - let connection = Arc::new(Mutex::new(conn)); - let tx_notify = Arc::new(Notify::new()); - let rx_notify = Arc::new(Notify::new()); - - debug!("connection {:?} handshake successful, udp_rx {}", connection_id, udp_rx.len()); - let handle = EstablishedHandle { - connection_id: connection_id.clone(), - connection: connection.clone(), - rx_notify: rx_notify.clone(), - tx_notify: tx_notify.clone() - }; - - { - let mut resp = response.lock(); - *resp = Some(HandshakeResponse::Established(handle)); - } - - let tx = ConnectionTx { - socket: socket.clone(), - socket_details: socket_details.clone(), - connection_id: connection_id.clone(), - connection: connection.clone(), - - tx_notify: tx_notify.clone(), - tx_stats: TxBurst::new(max_send_udp_payload_size), - }; - - let state = EstablishedState { - socket: socket.clone(), - tx_handle: tokio::spawn(tx.start_tx()), - - connection_id: connection_id.clone(), - connection: connection.clone(), - drop_connection: drop_connection.clone(), - - rx_notify: rx_notify.clone(), - tx_notify: tx_notify.clone(), - }; - - Ok(Some(state)) -} - - -// connection io tx directly via socket -async fn send_dgram(id: &ConnectionId<'_>, io: &Arc, buf: &[u8], to: SocketAddr) -> pingora_error::Result { - match io.send_to(buf, &to).await { - Ok(sent) => { - debug_assert_eq!(sent, buf.len(), "amount of network sent data does not correspond to packet size"); - trace!("connection {:?} sent dgram to={:?} length={:?} ", id, to, buf.len()); - Ok(sent) - } - Err(e) => { - error!("Failed sending packet via UDP. Error: {:?}", e); - Err(Error::explain( - ErrorType::WriteError, format!("Failed sending packet via UDP. Error: {:?}", e))) - } - } -} \ No newline at end of file diff --git a/pingora-core/src/protocols/tls/mod.rs b/pingora-core/src/protocols/tls/mod.rs index 04e8fd150..69ef28e19 100644 --- a/pingora-core/src/protocols/tls/mod.rs +++ b/pingora-core/src/protocols/tls/mod.rs @@ -31,6 +31,7 @@ pub use rustls::*; #[cfg(not(feature = "any_tls"))] pub mod noop_tls; +pub(crate) mod quic; #[cfg(not(feature = "any_tls"))] pub use noop_tls::*; diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs new file mode 100644 index 000000000..6663ee194 --- /dev/null +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -0,0 +1,287 @@ +use std::net::SocketAddr; +use std::sync::Arc; +use log::{debug, error, trace, warn}; +use parking_lot::Mutex; +use quiche::ConnectionId; +use tokio::net::UdpSocket; +use tokio::sync::Notify; +use pingora_error::{Error, ErrorType, OrErr}; +use crate::protocols::ConnectionState; +use crate::protocols::l4::quic::{Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, IncomingState, TxStats, MAX_IPV6_UDP_PACKET_SIZE}; +use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; +use crate::protocols::l4::stream::Stream as L4Stream; + +pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { + let Some(connection) = stream.quic_connection_state() else { + debug_assert!(false, "quic::handshake called on stream of another type"); + return Err(Error::explain(ErrorType::InternalError, "stream is not a quic stream")) + }; + + let e_state = match connection { + Connection::Incoming(i) => { + if let Some(e_state) = handshake_inner(i).await? { + // send HANDSHAKE_DONE Quic frame on established connection + e_state.tx_notify.notify_waiters(); + Some(e_state) + } else { + debug!("handshake either rejected or ignored for connection {:?}", i.connection_id); + None + } + } + Connection::Established(_) => { + debug_assert!(false, "quic::handshake on already established connection"); + return Err(Error::explain(ErrorType::HandshakeError, "handshake state not of type incoming")) + } + }; + + if let Some(e_state) = e_state { + connection.establish(e_state)?; + Ok(stream) + } else { + Err(Error::explain(ErrorType::HandshakeError, "handshake rejected or ignored")) + } +} + +async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result> { + let IncomingState { + connection_id: conn_id, + config, + drop_connection, + + socket, + socket_details, + udp_rx, + dgram, + + response, + + ignore, + reject + } = state; + + if *ignore { + { + let mut resp = response.lock(); + *resp = Some(HandshakeResponse::Ignored) + } + return Ok(None); + } else if *reject { + { + let mut resp = response.lock(); + *resp = Some(HandshakeResponse::Rejected) + } + return Ok(None); + // TODO: send to peer, return err if send fails + } + + let initial_dcid = dgram.header.dcid.clone(); + + // TODO: use correct buf sizes for IPv4 & IPv6 + // for now use IPv6 values as they are smaller, should work as well on IPv4 + let mut out = [0u8; MAX_IPV6_UDP_PACKET_SIZE]; + + if !quiche::version_is_supported(dgram.header.version) { + warn!("Quic packet version received is not supported. Negotiating version..."); + let size = quiche::negotiate_version(&dgram.header.scid, &dgram.header.dcid, &mut out) + .explain_err( + ErrorType::HandshakeError, |_| "creating version negotiation packet failed")?; + + // send data to network + send_dgram(conn_id, &socket, &out[..size], dgram.recv_info.from).await + .explain_err( + ErrorType::WriteError, |_| "sending version negotiation packet failed")?; + + // validate response + if let Some(resp_dgram) = udp_rx.recv().await { + if quiche::version_is_supported(resp_dgram.header.version) { + *dgram = resp_dgram + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "version negotiation failed as responded version is not supported")); + }; + } else { + return Err(Error::explain( + ErrorType::HandshakeError,"version negotiation did not receive a response")); + } + }; + + // token is always present in "Initial" packets + let token = dgram.header.token.as_ref().unwrap(); + // do stateless retry if the client didn't send a token + if token.is_empty() { + trace!("connection {:?} stateless retry as Quic header token is empty", conn_id); + + let hdr = &dgram.header; + let new_token = mint_token(&hdr, &dgram.recv_info.from); + let size = quiche::retry( + &hdr.scid, + &hdr.dcid, + &conn_id, + &new_token, + hdr.version, + &mut out, + ).explain_err(ErrorType::HandshakeError, |_| "creating retry packet failed")?; + + send_dgram(&conn_id, &socket, &out[..size], dgram.recv_info.from).await + .explain_err(ErrorType::WriteError, |_| "sending retry packet failed")?; + + // validate response + if let Some(resp_dgram) = udp_rx.recv().await { + // token is always present in "Initial" packets + let resp_token = resp_dgram.header.token.as_ref().unwrap(); + if resp_token.is_empty() { + return Err(Error::explain( + ErrorType::HandshakeError, + "Stateless retry failed. Still no token available after stateless retry.".to_string())); + } else { + *dgram = resp_dgram; + }; + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "Stateless retry did not receive a response.".to_string())); + } + } + + let hdr = &dgram.header; + let token = hdr.token.as_ref().unwrap(); + let odcid = validate_token(&dgram.recv_info.from, token); + + // The token was not valid, meaning the retry failed, so drop the connection. + if odcid.is_none() { + return Err(Error::explain( + ErrorType::HandshakeError, + "Quic header has invalid address validation token.".to_string())); + } + + // The destination id was not valid, so drop the connection. + if conn_id.len() != hdr.dcid.len() { + return Err(Error::explain( + ErrorType::HandshakeError, + "Quic header has invalid destination connection id.".to_string())); + } + + // Reuse the source connection ID we sent in the Retry packet, + // instead of changing it again. + debug!("new connection {:?} odcid={:?} scid={:?} ", hdr.dcid, initial_dcid, hdr.scid); + + let mut conn; + { + let mut config = config.lock(); + conn = quiche::accept(&hdr.dcid, Some(&initial_dcid), dgram.recv_info.to, dgram.recv_info.from, &mut config) + .explain_err(ErrorType::HandshakeError, |_| "connection instantiation failed")?; + } + + // receive quic data into connection + let buf = dgram.pkt.as_mut_slice(); + conn.recv(buf, dgram.recv_info) + .explain_err(ErrorType::HandshakeError, |_| "receiving initial data failed")?; + + debug!("connection {:?} starting handshake", conn_id); + // RSA handshake requires more than one packet + while !conn.is_established() { + trace!("connection {:?} creating handshake packet", conn_id); + 'tx: loop { + let (size, info) = match conn.send(out.as_mut_slice()) { + Ok((size, info)) => (size, info), + Err(quiche::Error::Done) => break 'tx, + Err(e) => return Err(e).explain_err( + ErrorType::WriteError, |_| "creating handshake packet failed"), + }; + + trace!("connection {:?} sending handshake packet", conn_id); + send_dgram(&conn_id, &socket, &out[..size], info.to).await + .explain_err(ErrorType::WriteError, |_| "sending handshake packet failed")?; + } + + trace!("connection {:?} waiting for handshake response", conn_id); + 'rx: loop { + if let Some(mut dgram) = udp_rx.recv().await { + trace!("connection {:?} received handshake response", conn_id); + conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) + .explain_err( + ErrorType::HandshakeError, |_| "receiving handshake response failed")?; + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "finishing handshake failed, did not receive a response")); + } + if udp_rx.is_empty() { + break 'rx; + } + } + + trace!("connection {:?} established={}, early_data={}, closed={}, draining={}, readable={}, timed_out={}, resumed={}", + conn_id, conn.is_established(), conn.is_in_early_data(), conn.is_closed(), + conn.is_draining(), conn.is_readable(), conn.is_timed_out(), conn.is_resumed()); + + trace!("connection {:?} peer_error={:?}, local_error={:?}", conn_id, conn.peer_error(), conn.local_error()); + if let Some(e) = conn.peer_error() { + error!("connection {:?} peer error reason: {}", conn_id, String::from_utf8_lossy(e.reason.as_slice()).to_string()); + } + if let Some(e) = conn.local_error() { + error!("connection {:?} local error reason: {}", conn_id, String::from_utf8_lossy(e.reason.as_slice()).to_string()); + } + } + + let max_send_udp_payload_size = conn.max_send_udp_payload_size(); + let connection_id = conn_id; + let connection = Arc::new(Mutex::new(conn)); + let tx_notify = Arc::new(Notify::new()); + let rx_notify = Arc::new(Notify::new()); + + debug!("connection {:?} handshake successful, udp_rx {}", connection_id, udp_rx.len()); + let handle = EstablishedHandle { + connection_id: connection_id.clone(), + connection: connection.clone(), + rx_notify: rx_notify.clone(), + tx_notify: tx_notify.clone() + }; + + { + let mut resp = response.lock(); + *resp = Some(HandshakeResponse::Established(handle)); + } + + let tx = ConnectionTx { + socket: socket.clone(), + socket_details: socket_details.clone(), + connection_id: connection_id.clone(), + connection: connection.clone(), + + tx_notify: tx_notify.clone(), + tx_stats: TxStats::new(max_send_udp_payload_size), + }; + + let state = EstablishedState { + socket: socket.clone(), + tx_handle: tokio::spawn(tx.start_tx()), + + connection_id: connection_id.clone(), + connection: connection.clone(), + drop_connection: drop_connection.clone(), + + rx_notify: rx_notify.clone(), + tx_notify: tx_notify.clone(), + }; + + Ok(Some(state)) +} + + +// connection io tx directly via socket +async fn send_dgram(id: &ConnectionId<'_>, io: &Arc, buf: &[u8], to: SocketAddr) -> pingora_error::Result { + match io.send_to(buf, &to).await { + Ok(sent) => { + debug_assert_eq!(sent, buf.len(), "amount of network sent data does not correspond to packet size"); + trace!("connection {:?} sent dgram to={:?} length={:?} ", id, to, buf.len()); + Ok(sent) + } + Err(e) => { + error!("Failed sending packet via UDP. Error: {:?}", e); + Err(Error::explain( + ErrorType::WriteError, format!("Failed sending packet via UDP. Error: {:?}", e))) + } + } +} \ No newline at end of file diff --git a/pingora-core/tests/utils/mod.rs b/pingora-core/tests/utils/mod.rs index cc219269e..7f00fe0bc 100644 --- a/pingora-core/tests/utils/mod.rs +++ b/pingora-core/tests/utils/mod.rs @@ -104,8 +104,7 @@ fn entry_point(opt: Option) { let mut echo_service_http = Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); - - echo_service_http.threads = Some(8); + echo_service_http.threads = Some(4); my_server.add_service(echo_service_http); my_server.run_forever(); From a729986a9e30eef1ac49e472628b5ad552c28bb1 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Tue, 14 Jan 2025 14:04:18 +0100 Subject: [PATCH 18/52] msrv 1.72 changes --- pingora-core/src/protocols/http/v3/server.rs | 30 +++++++++++--------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 8308d41ef..f2bffc383 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -17,6 +17,8 @@ use std::cmp; use std::collections::VecDeque; use std::fmt::Debug; +use std::future::Future; +use std::pin::Pin; use crate::protocols::{Digest, SocketAddr, Stream}; use bytes::{BufMut, Bytes, BytesMut}; use http::uri::PathAndQuery; @@ -650,20 +652,22 @@ impl HttpSession { hconn.send_body(&mut qconn, self.stream_id, body, fin) } - async fn stream_capacity(&self, required: usize) -> quiche::Result { - let capacity; - { - let qconn = self.quic_connection.lock(); - capacity = qconn.stream_capacity(self.stream_id)?; - } + fn stream_capacity(&self, required: usize) -> Pin> + Send + '_>> { + Box::pin(async move { + let capacity; + { + let qconn = self.quic_connection.lock(); + capacity = qconn.stream_capacity(self.stream_id)?; + } - if capacity >= required { - Ok(capacity) - } else { - self.tx_notify.notify_waiters(); - self.rx_notify.notified().await; - Box::pin(self.stream_capacity(required)).await - } + if capacity >= required { + Ok(capacity) + } else { + self.tx_notify.notify_waiters(); + self.rx_notify.notified().await; + self.stream_capacity(required).await + } + }) } /// Write response trailers to the client, this also closes the stream. From 3f062bcc726c52322fbe8272a4496f7052603561 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Tue, 14 Jan 2025 14:05:13 +0100 Subject: [PATCH 19/52] cargo fmt & clippy --- pingora-core/Cargo.toml | 2 +- pingora-core/src/listeners/mod.rs | 2 +- pingora-core/src/modules/http/grpc_web.rs | 2 +- pingora-core/src/protocols/http/server.rs | 5 +- pingora-core/src/protocols/http/v3/mod.rs | 21 +- pingora-core/src/protocols/http/v3/nohash.rs | 2 +- pingora-core/src/protocols/http/v3/server.rs | 380 ++++++++++-------- pingora-core/src/protocols/l4/listener.rs | 4 +- .../src/protocols/l4/quic/id_token.rs | 5 +- .../src/protocols/l4/quic/listener.rs | 159 +++++--- pingora-core/src/protocols/l4/quic/mod.rs | 113 +++--- pingora-core/src/protocols/l4/quic/sendto.rs | 28 +- .../src/protocols/l4/quic/settings.rs | 18 +- .../src/protocols/l4/quic/tls_handshake.rs | 1 + pingora-core/src/protocols/l4/stream.rs | 6 +- pingora-core/src/protocols/mod.rs | 2 +- pingora-core/src/protocols/tls/quic/mod.rs | 186 ++++++--- pingora-core/tests/utils/mod.rs | 11 +- 18 files changed, 586 insertions(+), 361 deletions(-) diff --git a/pingora-core/Cargo.toml b/pingora-core/Cargo.toml index 5dc226c6f..8dfa353b8 100644 --- a/pingora-core/Cargo.toml +++ b/pingora-core/Cargo.toml @@ -89,7 +89,7 @@ hyperlocal = "0.8" jemallocator = "0.5" [features] -default = [] +default = ["boringssl"] openssl = ["pingora-openssl", "openssl_derived"] boringssl = ["pingora-boringssl", "openssl_derived", "dep:quiche", "dep:ring"] rustls = ["pingora-rustls", "any_tls", "dep:x509-parser", "ouroboros"] diff --git a/pingora-core/src/listeners/mod.rs b/pingora-core/src/listeners/mod.rs index fc3e191a1..4a358227e 100644 --- a/pingora-core/src/listeners/mod.rs +++ b/pingora-core/src/listeners/mod.rs @@ -22,8 +22,8 @@ pub mod tls; #[cfg(not(feature = "any_tls"))] pub use crate::tls::listeners as tls; -use crate::protocols::{tls::TlsRef, ConnectionState, Stream}; use crate::protocols::tls::quic::handshake as quic_handshake; +use crate::protocols::{tls::TlsRef, ConnectionState, Stream}; #[cfg(unix)] use crate::server::ListenFds; diff --git a/pingora-core/src/modules/http/grpc_web.rs b/pingora-core/src/modules/http/grpc_web.rs index 85b6ea646..d864d99c3 100644 --- a/pingora-core/src/modules/http/grpc_web.rs +++ b/pingora-core/src/modules/http/grpc_web.rs @@ -75,6 +75,6 @@ pub struct GrpcWeb; impl HttpModuleBuilder for GrpcWeb { fn init(&self) -> Module { - Box::new(GrpcWebBridge::default()) + Box::::default() } } diff --git a/pingora-core/src/protocols/http/server.rs b/pingora-core/src/protocols/http/server.rs index 869dee2f4..3ce3b65d9 100644 --- a/pingora-core/src/protocols/http/server.rs +++ b/pingora-core/src/protocols/http/server.rs @@ -57,10 +57,7 @@ impl Session { /// Whether the session is HTTP/3. pub fn is_http3(&self) -> bool { - match self { - Session::H3(_) => true, - _ => false, - } + matches!(self, Session::H3(_)) } /// The session HTTP version. diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 1b64b204b..01ef7f123 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -14,15 +14,15 @@ //! HTTP/3 implementation -use std::fmt::Debug; use http::{HeaderMap, HeaderName, HeaderValue, Request, Uri, Version}; use log::warn; -use quiche::h3::{Header, NameValue}; -use pingora_http::{RequestHeader, ResponseHeader}; use pingora_error::{ErrorType, OrErr, Result}; +use pingora_http::{RequestHeader, ResponseHeader}; +use quiche::h3::{Header, NameValue}; +use std::fmt::Debug; -pub mod server; pub mod nohash; +pub mod server; pub fn event_to_request_headers(list: &Vec
) -> Result { let (mut parts, _) = Request::new(()).into_parts(); @@ -36,23 +36,24 @@ pub fn event_to_request_headers(list: &Vec
) -> Result { b":path" => uri = uri.path_and_query(h.value()), b":method" => match h.value().try_into() { Ok(v) => parts.method = v, - Err(_) => warn!("Failed to parse method from input: {:?}", h.value()) + Err(_) => warn!("Failed to parse method from input: {:?}", h.value()), }, _ => match HeaderName::from_bytes(h.name()) { Ok(k) => match HeaderValue::from_bytes(h.value()) { Ok(v) => { headers.append(k, v); - }, + } Err(_) => warn!("Failed to parse header value from input: {:?}", h.value()), }, Err(_) => warn!("Failed to parse header name input: {:?}", h.name()), - } + }, } } parts.version = Version::HTTP_3; - parts.uri = uri.build() - .explain_err(ErrorType::H3Error, |_| "failed to convert event parts to request uri")?; + parts.uri = uri.build().explain_err(ErrorType::H3Error, |_| { + "failed to convert event parts to request uri" + })?; parts.headers = headers; Ok(parts.into()) } @@ -78,4 +79,4 @@ fn header_size(headers: &[T]) -> usize { headers .iter() .fold(0, |acc, h| acc + h.value().len() + h.name().len() + 32) -} \ No newline at end of file +} diff --git a/pingora-core/src/protocols/http/v3/nohash.rs b/pingora-core/src/protocols/http/v3/nohash.rs index 357c0575d..3488e9d40 100644 --- a/pingora-core/src/protocols/http/v3/nohash.rs +++ b/pingora-core/src/protocols/http/v3/nohash.rs @@ -57,4 +57,4 @@ impl std::hash::Hasher for StreamIdHasher { type BuildStreamIdHasher = std::hash::BuildHasherDefault; pub type StreamIdHashMap = HashMap; -pub type StreamIdHashSet = HashSet; \ No newline at end of file +pub type StreamIdHashSet = HashSet; diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index f2bffc383..b8213f5ca 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -14,38 +14,40 @@ //! HTTP/3 server session -use std::cmp; -use std::collections::VecDeque; -use std::fmt::Debug; -use std::future::Future; -use std::pin::Pin; +use crate::protocols::http::date::get_cached_date; +use crate::protocols::http::v1::client::http_req_header_to_wire; use crate::protocols::{Digest, SocketAddr, Stream}; use bytes::{BufMut, Bytes, BytesMut}; use http::uri::PathAndQuery; use http::{header, HeaderMap, HeaderName}; -use pingora_error::{Error, ErrorType, OrErr, Result}; -use std::sync::{Arc, OnceLock}; -use std::time::{Duration, Instant}; use log::{debug, error, info, trace, warn}; use parking_lot::Mutex; -use crate::protocols::http::v1::client::http_req_header_to_wire; +use pingora_error::{Error, ErrorType, OrErr, Result}; use pingora_http::{RequestHeader, ResponseHeader}; -use crate::protocols::http::date::get_cached_date; +use std::cmp; +use std::collections::VecDeque; +use std::fmt::Debug; +use std::future::Future; +use std::pin::Pin; +use std::sync::{Arc, OnceLock}; +use std::time::{Duration, Instant}; +use crate::protocols::http::body_buffer::FixedBuffer; +use crate::protocols::http::v3::nohash::StreamIdHashMap; +use crate::protocols::http::v3::{ + event_to_request_headers, header_size, headermap_to_headervec, response_headers_to_event, +}; use crate::protocols::http::HttpTask; -pub use quiche::h3::Config as H3Options; use crate::protocols::l4::quic::{Connection, MAX_IPV6_QUIC_DATAGRAM_SIZE}; -use quiche::{h3, Connection as QuicheConnection, ConnectionId, Shutdown}; +pub use quiche::h3::Config as H3Options; use quiche::h3::{Connection as QuicheH3Connection, Event, NameValue}; -use tokio::sync::{mpsc, Notify}; +use quiche::{h3, Connection as QuicheConnection, ConnectionId, Shutdown}; use tokio::sync::mpsc::{Receiver, Sender}; -use crate::protocols::http::body_buffer::FixedBuffer; -use crate::protocols::http::v3::{event_to_request_headers, header_size, headermap_to_headervec, response_headers_to_event}; -use crate::protocols::http::v3::nohash::StreamIdHashMap; +use tokio::sync::{mpsc, Notify}; static H3_OPTIONS: OnceLock = OnceLock::new(); -const H3_SESSION_EVENTS_CHANNEL_SIZE : usize = 256; +const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; const BODY_BUF_LIMIT: usize = 1024 * 64; const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); @@ -60,26 +62,36 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ return Err(Error::explain( ErrorType::InternalError, - "connection needs to be established, invalid state")) + "connection needs to be established, invalid state", + )) } Connection::Established(state) => { let hconn = { let mut qconn = state.connection.lock(); - h3::Connection::with_transport(&mut qconn, &options).explain_err( - ErrorType::ConnectError, |_| "failed to create H3 connection")? + h3::Connection::with_transport(&mut qconn, options) + .explain_err(ErrorType::ConnectError, |_| { + "failed to create H3 connection" + })? }; state.tx_notify.notify_waiters(); - (state.connection_id.clone(), state.connection.clone(), state.drop_connection.clone(), hconn, - state.tx_notify.clone(), state.rx_notify.clone()) + ( + state.connection_id.clone(), + state.connection.clone(), + state.drop_connection.clone(), + hconn, + state.tx_notify.clone(), + state.rx_notify.clone(), + ) } }; @@ -95,7 +107,9 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

Result<()> { // send GOAWAY frame @@ -137,7 +150,8 @@ impl H3Connection { let mut hconn = self.h3_connection.lock(); debug!("H3 connection {:?} sending GoAway", self.connection_id); - hconn.send_goaway(&mut qconn, self.max_accepted_stream_id) + hconn + .send_goaway(&mut qconn, self.max_accepted_stream_id) .explain_err(ErrorType::H3Error, |_| "failed to send graceful shutdown")?; self.tx_notify.notify_waiters(); } @@ -158,14 +172,17 @@ impl H3Connection { // close quic connection { let mut qconn = self.quic_connection.lock(); - qconn.close(false, 0x00, b"graceful shutdown") + qconn + .close(false, 0x00, b"graceful shutdown") .explain_err(ErrorType::H3Error, |_| "failed to close quic connection")?; self.tx_notify.notify_waiters(); } if is_timeout { Err(Error::explain( - ErrorType::InternalError, "h3 session draining timed out with active sessions")) + ErrorType::InternalError, + "h3 session draining timed out with active sessions", + )) } else { Ok(()) } @@ -178,12 +195,16 @@ impl H3Connection { while let Some(stream_id) = drop_sessions.pop_front() { match self.sessions.remove(&stream_id) { None => { - warn!("connection {:?} failed to remove stream {} from sessions", - self.connection_id, stream_id) + warn!( + "connection {:?} failed to remove stream {} from sessions", + self.connection_id, stream_id + ) } Some(_) => { - debug!("connection {:?} stream {} removed from sessions", - self.connection_id, stream_id); + debug!( + "connection {:?} stream {} removed from sessions", + self.connection_id, stream_id + ); } }; } @@ -235,7 +256,10 @@ impl Drop for HttpSession { fn drop(&mut self) { let mut drop_sessions = self.drop_session.lock(); drop_sessions.push_back(self.stream_id); - debug!("H3 connection {:?} drop stream {}", self.connection_id, self.stream_id); + debug!( + "H3 connection {:?} drop stream {}", + self.connection_id, self.stream_id + ); } } @@ -269,7 +293,7 @@ impl HttpSession { Ok((stream_id, ev)) => { if let Some(goaway_id) = conn.received_goaway { // do not accept new streams, continue processing existing streams - if stream_id >= goaway_id { + if stream_id >= goaway_id { continue 'poll; } } @@ -279,10 +303,12 @@ impl HttpSession { "H3 connection {:?} stream {} forward event={:?}", conn.connection_id, stream_id, ev ); - channel.send(ev).await - .explain_err( - ErrorType::WriteError, - |e| format!("failed to send on event channel with {}", e))?; + channel + .send(ev) + .await + .explain_err(ErrorType::WriteError, |e| { + format!("failed to send on event channel with {}", e) + })?; } else { debug!( "H3 connection {:?} stream {} received event {:?}", @@ -301,11 +327,13 @@ impl HttpSession { let mut qconn = conn.quic_connection.lock(); let mut hconn = conn.h3_connection.lock(); - hconn.send_goaway(&mut qconn, conn.max_accepted_stream_id) - .explain_err( - ErrorType::InternalError, |_| "failed to send goaway")?; + hconn + .send_goaway(&mut qconn, conn.max_accepted_stream_id) + .explain_err(ErrorType::InternalError, |_| { + "failed to send goaway" + })?; conn.tx_notify.notify_waiters(); - }, + } Event::Headers { list, more_frames } => { trace!( "H3 connection {:?} request headers={:?}, more_frames={:?}", @@ -340,12 +368,16 @@ impl HttpSession { body_sent: 0, send_ended: false, - digest + digest, }; - if let Some(_) = conn.sessions.insert(stream_id, event_tx) { - debug_assert!(false, "H3 connection {:?} stream {} existing \ - session is not allowed", conn.connection_id, stream_id) + if conn.sessions.insert(stream_id, event_tx).is_some() { + debug_assert!( + false, + "H3 connection {:?} stream {} existing \ + session is not allowed", + conn.connection_id, stream_id + ) }; conn.max_accepted_stream_id = session.stream_id; @@ -365,16 +397,22 @@ impl HttpSession { let timeout_now; { let qconn = conn.quic_connection.lock(); - is_closed = qconn.is_closed() || - !(qconn.is_established() || qconn.is_in_early_data()); + is_closed = qconn.is_closed() + || !(qconn.is_established() || qconn.is_in_early_data()); if is_closed { if let Some(e) = qconn.peer_error() { - debug!("connection {:?} peer error reason: {}", conn.connection_id, - String::from_utf8_lossy(e.reason.as_slice()).to_string()); + debug!( + "connection {:?} peer error reason: {}", + conn.connection_id, + String::from_utf8_lossy(e.reason.as_slice()).to_string() + ); } if let Some(e) = qconn.local_error() { - debug!("connection {:?} local error reason: {}", conn.connection_id, - String::from_utf8_lossy(e.reason.as_slice()).to_string()); + debug!( + "connection {:?} local error reason: {}", + conn.connection_id, + String::from_utf8_lossy(e.reason.as_slice()).to_string() + ); } } timeout = qconn.timeout_instant(); @@ -382,15 +420,18 @@ impl HttpSession { } if is_closed { - if conn.sessions.len() > 0 { - warn!("H3 connection {:?} closed with open {} sessions", - conn.connection_id, conn.sessions.len()); + if !conn.sessions.is_empty() { + warn!( + "H3 connection {:?} closed with open {} sessions", + conn.connection_id, + conn.sessions.len() + ); } else { debug!("H3 connection {:?} closed", conn.connection_id); } conn.tx_notify.notify_waiters(); - return Ok(None) + return Ok(None); } // race for new data on connection or timeout @@ -402,26 +443,25 @@ impl HttpSession { // quiche timeout durations are sometimes 0ns, None would be expected // this can lead to premature closing of the connection // guarding with DEFAULT_CONNECTION_IDLE_TIMEOUT - if timeout.is_none() { - trace!("connection {:?} default timeout {:?}", conn.connection_id, DEFAULT_CONNECTION_IDLE_TIMEOUT); - tokio::time::sleep(DEFAULT_CONNECTION_IDLE_TIMEOUT.into()).await; - DEFAULT_CONNECTION_IDLE_TIMEOUT - } else { - if timeout < Instant::now().checked_add(DEFAULT_CONNECTION_IDLE_TIMEOUT) { + if let Some(timeout) = timeout { + if Some(timeout) < Instant::now().checked_add(DEFAULT_CONNECTION_IDLE_TIMEOUT) { trace!("connection {:?} default timeout {:?}", conn.connection_id, DEFAULT_CONNECTION_IDLE_TIMEOUT); - tokio::time::sleep(DEFAULT_CONNECTION_IDLE_TIMEOUT.into()).await; + tokio::time::sleep(DEFAULT_CONNECTION_IDLE_TIMEOUT).await; DEFAULT_CONNECTION_IDLE_TIMEOUT } else { - let timeout = timeout.unwrap(); let timeout_duration = timeout.duration_since(timeout_now); - tokio::time::sleep(timeout_duration.into()).await; + tokio::time::sleep(timeout_duration).await; trace!("connection {:?} timeout {:?}", conn.connection_id, timeout_duration); timeout_duration } + } else { + trace!("connection {:?} default timeout {:?}", conn.connection_id, DEFAULT_CONNECTION_IDLE_TIMEOUT); + tokio::time::sleep(DEFAULT_CONNECTION_IDLE_TIMEOUT).await; + DEFAULT_CONNECTION_IDLE_TIMEOUT } } => { conn.sessions_housekeeping().await; - if conn.sessions.len() > 0 { + if !conn.sessions.is_empty() { warn!("connection {:?} timeout {:?} reached with {} open sessions {:?}", conn.connection_id, used_timeout_duration, conn.sessions.len(), conn.sessions); } else { @@ -445,8 +485,9 @@ impl HttpSession { conn.tx_notify.notify_waiters(); error!("H3 connection closed with error {:?}.", e); - return Err(e).explain_err( - ErrorType::H3Error, |_| "while accepting new downstream requests") + return Err(e).explain_err(ErrorType::H3Error, |_| { + "while accepting new downstream requests" + }); } } } @@ -472,7 +513,7 @@ impl HttpSession { pub async fn read_body_bytes(&mut self) -> Result> { self.data_finished_event().await?; if self.read_ended { - return Ok(None) + return Ok(None); } let mut buf = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; @@ -480,10 +521,14 @@ impl HttpSession { Ok(size) => size, Err(h3::Error::Done) => { trace!("recv_body done"); - return Ok(Some(BytesMut::with_capacity(0).into())) - }, - Err(e) => return Err(Error::explain( - ErrorType::ReadError, format!("reading body failed with {}", e))) + return Ok(Some(BytesMut::with_capacity(0).into())); + } + Err(e) => { + return Err(Error::explain( + ErrorType::ReadError, + format!("reading body failed with {}", e), + )) + } }; let mut data = BytesMut::with_capacity(size); @@ -499,14 +544,12 @@ impl HttpSession { Ok(Some(data)) } - fn recv_body(&self, out: &mut [u8]) -> h3::Result { let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); debug!( "H3 connection {:?} stream {} receiving body", - self.connection_id, - self.stream_id + self.connection_id, self.stream_id ); hconn.recv_body(&mut qconn, self.stream_id, out) } @@ -561,33 +604,30 @@ impl HttpSession { Ok(()) } - async fn send_response( - &self, - headers: &[T], - fin: bool, - ) -> Result<()> { - self.stream_capacity(header_size(headers)).await - .explain_err( - ErrorType::WriteError, - |_| format!("H3 connection {:?} failed to acquire capacity for stream {}", - self.connection_id, self.stream_id))?; + async fn send_response(&self, headers: &[T], fin: bool) -> Result<()> { + self.stream_capacity(header_size(headers)) + .await + .explain_err(ErrorType::WriteError, |_| { + format!( + "H3 connection {:?} failed to acquire capacity for stream {}", + self.connection_id, self.stream_id + ) + })?; let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); debug!( "H3 connection {:?} stream {} sending response headers={:?}, finished={}", - self.connection_id, - self.stream_id, - headers, - fin + self.connection_id, self.stream_id, headers, fin ); match hconn.send_response(&mut qconn, self.stream_id, headers, fin) { - Ok(()) => { Ok(()) }, - Err(h3::Error::Done) => { Ok(()) }, - Err(e) => Err(e).explain_err( - ErrorType::WriteError, |_| "H3 connection failed to write response"), + Ok(()) => Ok(()), + Err(h3::Error::Done) => Ok(()), + Err(e) => Err(e).explain_err(ErrorType::WriteError, |_| { + "H3 connection failed to write response" + }), } } @@ -609,26 +649,33 @@ impl HttpSession { let mut fin = end; while sent_len < data.len() { let required = cmp::min(data.len() - sent_len, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let capacity = self.stream_capacity(required).await - .explain_err( - ErrorType::WriteError, - |e| format!("Failed to acquire capacity on stream id {} with {}", self.stream_id, e))?; - - let send; - if capacity > data.len() - sent_len { - send = &data[sent_len..data.len()]; + let capacity = + self.stream_capacity(required) + .await + .explain_err(ErrorType::WriteError, |e| { + format!( + "Failed to acquire capacity on stream id {} with {}", + self.stream_id, e + ) + })?; + + let send= if capacity > data.len() - sent_len { + &data[sent_len..data.len()] } else { - send = &data[sent_len..sent_len + capacity]; - } + &data[sent_len..sent_len + capacity] + }; fin = sent_len + send.len() == data.len() && end; match self.send_body(send, fin) { Ok(sent_size) => { debug_assert_eq!(sent_size, send.len()); sent_len += sent_size; - }, - Err(e) => return Err(e).explain_err( - ErrorType::WriteError, |_| "writing h3 response body to downstream") + } + Err(e) => { + return Err(e).explain_err(ErrorType::WriteError, |_| { + "writing h3 response body to downstream" + }) + } } } debug_assert_eq!(fin, end); @@ -646,13 +693,21 @@ impl HttpSession { let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); - debug!("H3 connection {:?} stream {} sending response body with length={:?}, finished={}", - self.connection_id, self.stream_id, body.len(), fin); + debug!( + "H3 connection {:?} stream {} sending response body with length={:?}, finished={}", + self.connection_id, + self.stream_id, + body.len(), + fin + ); hconn.send_body(&mut qconn, self.stream_id, body, fin) } - fn stream_capacity(&self, required: usize) -> Pin> + Send + '_>> { + fn stream_capacity( + &self, + required: usize, + ) -> Pin> + Send + '_>> { Box::pin(async move { let capacity; { @@ -675,13 +730,16 @@ impl HttpSession { if self.send_ended { warn!("Tried to write trailers after end of stream, dropping them"); return Ok(()); - } else if self.body_sent <= 0 { + } else if self.body_sent == 0 { return Err(Error::explain( - ErrorType::H3Error, "Trying to send trailers before body is sent.")); + ErrorType::H3Error, + "Trying to send trailers before body is sent.", + )); }; let headers = headermap_to_headervec(&trailers); - self.send_additional_headers(self.stream_id, headers.as_slice(), true, true).await?; + self.send_additional_headers(self.stream_id, headers.as_slice(), true, true) + .await?; // sending trailers closes the stream self.send_ended = true; @@ -696,13 +754,16 @@ impl HttpSession { is_trailer: bool, fin: bool, ) -> Result<()> { - self.stream_capacity(header_size(headers)).await - .explain_err( - ErrorType::WriteError, - |_| format!("H3 connection {:?} failed to acquire capacity for stream {}", - self.connection_id, self.stream_id))?; + self.stream_capacity(header_size(headers)) + .await + .explain_err(ErrorType::WriteError, |_| { + format!( + "H3 connection {:?} failed to acquire capacity for stream {}", + self.connection_id, self.stream_id + ) + })?; - let mut qconn = self.quic_connection.lock(); + let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); debug!( @@ -719,8 +780,9 @@ impl HttpSession { self.tx_notify.notify_waiters(); Ok(()) } - Err(e) => Err(e).explain_err( - ErrorType::WriteError, |_| "H3 connection failed to write h3 trailers to downstream"), + Err(e) => Err(e).explain_err(ErrorType::WriteError, |_| { + "H3 connection failed to write h3 trailers to downstream" + }), } } @@ -747,11 +809,10 @@ impl HttpSession { if self.response_header_written.is_some() { // use an empty data frame to signal the end - self.send_body(&[], true) - .explain_err( - ErrorType::WriteError, - |e| format! {"Writing h3 response body to downstream failed. {e}"}, - )?; + self.send_body(&[], true).explain_err( + ErrorType::WriteError, + |e| format! {"Writing h3 response body to downstream failed. {e}"}, + )?; self.tx_notify.notify_waiters(); self.send_ended = true; } @@ -768,19 +829,20 @@ impl HttpSession { Event::Finished => { trace!("stream {} event {:?}", self.stream_id, ev); self.read_ended = true; - return Ok(()) + return Ok(()); } Event::Headers { .. } => { debug_assert!(false, "Headers or Finished event when Data requested"); - }, + } Event::Data => { trace!("stream {} event {:?}", self.stream_id, ev); - return Ok(()) + return Ok(()); } Event::Reset(error_code) => { return Err(Error::explain( ErrorType::H3Error, - format!("stream was reset with error code {}", error_code))) + format!("stream was reset with error code {}", error_code), + )) } Event::PriorityUpdate => { // TODO: this step should be deferred until @@ -795,18 +857,21 @@ impl HttpSession { .explain_err(ErrorType::H3Error, "failed to receive priority update field value")?; */ warn!("received unhandled priority update"); - continue + continue; } Event::GoAway => { // RFC 9114 Section 5.2 & 7.2.6 warn!("received unhandled go-away"); - continue - }, + continue; + } } } - None => return Err(Error::explain( - ErrorType::ReadError, - "H3 session event channel disconnected")), + None => { + return Err(Error::explain( + ErrorType::ReadError, + "H3 session event channel disconnected", + )) + } } } } @@ -817,23 +882,19 @@ impl HttpSession { Some(ev) => { error!("reset stream {} event {:?}", self.stream_id, ev); match ev { - Event::Data | - Event::Finished | - Event::GoAway | - Event::PriorityUpdate => { - continue - } - Event::Headers { .. } => { + Event::Data | Event::Finished | Event::GoAway | Event::PriorityUpdate => { continue } - Event::Reset(error_code) => { - return Ok(error_code) - } + Event::Headers { .. } => continue, + Event::Reset(error_code) => return Ok(error_code), } } - None => return Err(Error::explain( - ErrorType::ReadError, - "H3 session event channel disconnected")), + None => { + return Err(Error::explain( + ErrorType::ReadError, + "H3 session event channel disconnected", + )) + } } } } @@ -921,7 +982,7 @@ impl HttpSession { let mut qconn = self.quic_connection.lock(); match qconn.stream_shutdown(self.stream_id, direction, error_code) { Ok(()) => self.tx_notify.notify_waiters(), - Err(e) => warn!("h3 stream {} shutdown failed. {:?}", self.stream_id, e) + Err(e) => warn!("h3 stream {} shutdown failed. {:?}", self.stream_id, e), } } @@ -938,7 +999,8 @@ impl HttpSession { /// Whether there is any body to read. pub fn is_body_empty(&self) -> bool { - self.request_has_body || self + self.request_has_body + || self .request_header .headers .get(header::CONTENT_LENGTH) @@ -973,8 +1035,8 @@ impl HttpSession { /// its internal tasks as the client waiting for the tasks goes away pub async fn idle(&mut self) -> Result<()> { match self.reset_event().await { - Ok(_error_code) => { Ok(()) } - Err(e) => Err(e) + Ok(_error_code) => Ok(()), + Err(e) => Err(e), } } @@ -983,8 +1045,10 @@ impl HttpSession { pub async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result> { if no_body_expected || self.is_body_done() { let reason = self.reset_event().await?; - Error::e_explain(ErrorType::H3Error, - format!("Client closed H3, reason: {reason}")) + Error::e_explain( + ErrorType::H3Error, + format!("Client closed H3, reason: {reason}"), + ) } else { self.read_body_bytes().await } @@ -1019,4 +1083,4 @@ impl HttpSession { pub fn client_addr(&self) -> Option<&SocketAddr> { self.digest.socket_digest.as_ref().map(|d| d.peer_addr())? } -} \ No newline at end of file +} diff --git a/pingora-core/src/protocols/l4/listener.rs b/pingora-core/src/protocols/l4/listener.rs index 7dc677279..f97208bfd 100644 --- a/pingora-core/src/protocols/l4/listener.rs +++ b/pingora-core/src/protocols/l4/listener.rs @@ -55,8 +55,6 @@ impl From for Listener { } } - - #[cfg(windows)] impl AsRawSocket for Listener { fn as_raw_socket(&self) -> std::os::windows::io::RawSocket { @@ -75,7 +73,7 @@ impl Listener { // a Quic connection supports IP address switching; // for multi-path a primary peer_addr needs to be selected l.accept().await.map(|(stream, peer_addr)| { - let mut s: Stream = stream.into(); + let mut s: Stream = stream; #[cfg(unix)] let digest = SocketDigest::from_raw_fd(s.as_raw_fd()); diff --git a/pingora-core/src/protocols/l4/quic/id_token.rs b/pingora-core/src/protocols/l4/quic/id_token.rs index b4d2a61b9..d460c323b 100644 --- a/pingora-core/src/protocols/l4/quic/id_token.rs +++ b/pingora-core/src/protocols/l4/quic/id_token.rs @@ -59,7 +59,8 @@ pub(crate) fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec /// Note that this function is only an example and doesn't do any cryptographic /// authenticate of the token. *It should not be used in production system*. pub(crate) fn validate_token<'a>( - src: &net::SocketAddr, token: &'a [u8], + src: &net::SocketAddr, + token: &'a [u8], ) -> Option> { // TODO: implement token generation/validation using crypto if token.len() < 6 { @@ -82,4 +83,4 @@ pub(crate) fn validate_token<'a>( } Some(quiche::ConnectionId::from_ref(&token[addr.len()..])) -} \ No newline at end of file +} diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index b51d4943f..ac0e36cc4 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -1,19 +1,23 @@ +use crate::protocols::l4::quic::sendto::{detect_gso, set_txtime_sockopt}; +use crate::protocols::l4::quic::{ + Connection, ConnectionHandle, Crypto, HandshakeResponse, IncomingHandle, IncomingState, + Listener, SocketDetails, UdpRecv, CONNECTION_DROP_DEQUE_INITIAL_SIZE, + HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, MAX_IPV6_QUIC_DATAGRAM_SIZE, +}; +use log::{debug, error, trace, warn}; +use parking_lot::Mutex; +use pingora_error::{BError, Error, ErrorType}; +use quiche::{ConnectionId, Header, RecvInfo, Type}; +use ring::hmac::Key; +use ring::rand::SystemRandom; use std::collections::VecDeque; use std::io; use std::io::ErrorKind; use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; use std::sync::Arc; -use log::{debug, error, trace, warn}; -use parking_lot::Mutex; -use quiche::{ConnectionId, Header, RecvInfo, Type}; -use ring::hmac::Key; -use ring::rand::SystemRandom; use tokio::net::UdpSocket; use tokio::sync::mpsc::channel; -use pingora_error::{BError, Error, ErrorType}; -use crate::protocols::l4::quic::{Connection, ConnectionHandle, Crypto, HandshakeResponse, IncomingHandle, IncomingState, Listener, SocketDetails, UdpRecv, CONNECTION_DROP_DEQUE_INITIAL_SIZE, HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, MAX_IPV6_QUIC_DATAGRAM_SIZE}; -use crate::protocols::l4::quic::sendto::{detect_gso, set_txtime_sockopt}; use quiche::Connection as QuicheConnection; @@ -21,15 +25,19 @@ impl TryFrom for Listener { type Error = BError; fn try_from(io: UdpSocket) -> pingora_error::Result { - let addr = io.local_addr() - .map_err(|e| Error::explain( + let addr = io.local_addr().map_err(|e| { + Error::explain( ErrorType::SocketError, - format!("failed to get local address from socket: {}", e)))?; + format!("failed to get local address from socket: {}", e), + ) + })?; let rng = SystemRandom::new(); - let key = Key::generate(ring::hmac::HMAC_SHA256, &rng) - .map_err(|e| Error::explain( + let key = Key::generate(ring::hmac::HMAC_SHA256, &rng).map_err(|e| { + Error::explain( ErrorType::InternalError, - format!("failed to generate listener key: {}", e)))?; + format!("failed to generate listener key: {}", e), + ) + })?; let settings = crate::protocols::l4::quic::settings::Settings::try_default()?; @@ -38,11 +46,11 @@ impl TryFrom for Listener { Ok(_) => { debug!("successfully set SO_TXTIME socket option"); true - }, + } Err(e) => { debug!("setsockopt failed {:?}", e); false - }, + } }; Ok(Listener { @@ -54,18 +62,20 @@ impl TryFrom for Listener { }, config: settings.get_config(), - crypto: Crypto { - key - }, + crypto: Crypto { key }, connections: Default::default(), - drop_connections: Arc::new(Mutex::new(VecDeque::with_capacity(CONNECTION_DROP_DEQUE_INITIAL_SIZE))) + drop_connections: Arc::new(Mutex::new(VecDeque::with_capacity( + CONNECTION_DROP_DEQUE_INITIAL_SIZE, + ))), }) } } impl Listener { - pub(crate) async fn accept(&mut self) -> io::Result<(crate::protocols::l4::stream::Stream, SocketAddr)> { + pub(crate) async fn accept( + &mut self, + ) -> io::Result<(crate::protocols::l4::stream::Stream, SocketAddr)> { let mut rx_buf = [0u8; MAX_IPV6_BUF_SIZE]; debug!("endpoint rx loop"); @@ -79,7 +89,7 @@ impl Listener { self.socket.readable().await?; continue 'read; } else { - return Err(e) + return Err(e); } } }; @@ -89,14 +99,20 @@ impl Listener { let mut drop_conn = self.drop_connections.lock(); while let Some(drop_id) = drop_conn.pop_front() { match self.connections.remove(&drop_id) { - None => warn!("failed to remove connection handle {:?} from connections", drop_id), - Some(_) => debug!("removed connection handle {:?} from connections", drop_id) + None => warn!( + "failed to remove connection handle {:?} from connections", + drop_id + ), + Some(_) => { + debug!("removed connection handle {:?} from connections", drop_id) + } } } } // parse the Quic packet's header - let header = match Header::from_slice(rx_buf[..size].as_mut(), quiche::MAX_CONN_ID_LEN) { + let header = match Header::from_slice(rx_buf[..size].as_mut(), quiche::MAX_CONN_ID_LEN) + { Ok(hdr) => hdr, Err(e) => { warn!("Parsing Quic packet header failed with error: {:?}.", e); @@ -124,10 +140,18 @@ impl Listener { handle = self.connections.get_mut(&conn_id); }; - trace!("connection {:?} network received from={} length={}", conn_id, from, size); + trace!( + "connection {:?} network received from={} length={}", + conn_id, + from, + size + ); if let Some(handle) = handle { - debug!("existing connection {:?} {:?} {:?}", conn_id, handle, header); + debug!( + "existing connection {:?} {:?} {:?}", + conn_id, handle, header + ); let mut established_handle = None; match handle { ConnectionHandle::Incoming(i) => { @@ -138,14 +162,16 @@ impl Listener { if let Some(resp) = resp { match resp { HandshakeResponse::Established(e) => { - debug!("connection {:?} received HandshakeResponse::Established", conn_id); + debug!( + "connection {:?} received HandshakeResponse::Established", + conn_id + ); // receive data into existing connection established_handle = Some(e); } - HandshakeResponse::Ignored - | HandshakeResponse::Rejected => { + HandshakeResponse::Ignored | HandshakeResponse::Rejected => { self.connections.remove(&header.dcid); - continue 'read + continue 'read; } } } else { @@ -154,7 +180,12 @@ impl Listener { } ConnectionHandle::Established(e) => { // receive data into existing connection - match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { + match Self::recv_connection( + &conn_id, + e.connection.as_ref(), + &mut rx_buf[..size], + recv_info, + ) { Ok(_len) => { e.rx_notify.notify_waiters(); e.tx_notify.notify_waiters(); @@ -168,7 +199,12 @@ impl Listener { } } if let Some(e) = established_handle { - match Self::recv_connection(&conn_id, e.connection.as_ref(), &mut rx_buf[..size], recv_info) { + match Self::recv_connection( + &conn_id, + e.connection.as_ref(), + &mut rx_buf[..size], + recv_info, + ) { Ok(_len) => { e.rx_notify.notify_waiters(); e.tx_notify.notify_waiters(); @@ -185,20 +221,28 @@ impl Listener { }; if let Some(udp_tx) = udp_tx { // receive data on UDP channel - match udp_tx.send(UdpRecv { - pkt: rx_buf[..size].to_vec(), - header, - recv_info, - }).await { - Ok(()) => {}, - Err(e) => warn!("sending dgram to connection {:?} failed with error: {}", conn_id, e) + match udp_tx + .send(UdpRecv { + pkt: rx_buf[..size].to_vec(), + header, + recv_info, + }) + .await + { + Ok(()) => {} + Err(e) => warn!( + "sending dgram to connection {:?} failed with error: {}", + conn_id, e + ), } continue 'read; } - if header.ty != Type::Initial { - debug!("Quic packet type is not \"Initial\". Header: {:?}. Continuing...", header); + debug!( + "Quic packet type is not \"Initial\". Header: {:?}. Continuing...", + header + ); continue 'read; } @@ -226,31 +270,40 @@ impl Listener { ignore: false, reject: false, }); - let handle = ConnectionHandle::Incoming(IncomingHandle { - udp_tx, - response, - }); + let handle = ConnectionHandle::Incoming(IncomingHandle { udp_tx, response }); self.connections.insert(conn_id, handle); - return Ok((connection.into(), from)) + return Ok((connection.into(), from)); } } - fn recv_connection(conn_id: &ConnectionId<'_>, conn: &Mutex, mut rx_buf: &mut [u8], recv_info: RecvInfo) -> io::Result { + fn recv_connection( + conn_id: &ConnectionId<'_>, + conn: &Mutex, + rx_buf: &mut [u8], + recv_info: RecvInfo, + ) -> io::Result { let size = rx_buf.len(); let mut conn = conn.lock(); - match conn.recv(&mut rx_buf, recv_info) { + match conn.recv(rx_buf, recv_info) { Ok(len) => { debug!("connection {:?} received data length={}", conn_id, len); - debug_assert_eq!(size, len, "size received on connection not equal to len received from network."); + debug_assert_eq!( + size, len, + "size received on connection not equal to len received from network." + ); Ok(len) } Err(e) => { error!("connection {:?} receive error {:?}", conn_id, e); Err(io::Error::new( io::ErrorKind::BrokenPipe, - format!("Connection could not receive network data for {:?}. {:?}", - conn.destination_id(), e))) + format!( + "Connection could not receive network data for {:?}. {:?}", + conn.destination_id(), + e + ), + )) } } } @@ -277,4 +330,4 @@ impl AsRawFd for crate::protocols::l4::listener::Listener { Self::Unix(l) => l.as_raw_fd(), } } -} \ No newline at end of file +} diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 32df2ed44..db69193c2 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -1,32 +1,32 @@ +use log::{debug, error, trace}; +use parking_lot::Mutex; +use pingora_error::{Error, ErrorType, OrErr, Result}; +use quiche::Connection as QuicheConnection; +use quiche::{Config, ConnectionId, Header, RecvInfo, Stats}; +use ring::hmac::Key; use std::collections::{HashMap, VecDeque}; -use std::{io, mem}; use std::fmt::{Debug, Formatter}; use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use log::{debug, error, trace}; -use parking_lot::Mutex; -use quiche::{Config, ConnectionId, Header, RecvInfo, Stats}; -use ring::hmac::Key; +use std::{io, mem}; use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio::net::UdpSocket; -use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::mpsc::error::TryRecvError; +use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::Notify; -use pingora_error::{Error, ErrorType, OrErr, Result}; -use quiche::Connection as QuicheConnection; use tokio::task::JoinHandle; -mod sendto; pub(crate) mod id_token; -pub(crate) mod tls_handshake; -mod settings; mod listener; +mod sendto; +mod settings; +pub(crate) mod tls_handshake; -use crate::protocols::ConnectionState; use crate::protocols::l4::quic::sendto::send_to; +use crate::protocols::ConnectionState; // UDP header 8 bytes, IPv4 Header 20 bytes //pub const MAX_IPV4_BUF_SIZE: usize = 65507; @@ -77,7 +77,7 @@ pub struct IncomingState { pub(crate) dgram: UdpRecv, pub(crate) ignore: bool, - pub(crate) reject: bool + pub(crate) reject: bool, } #[derive(Clone)] @@ -154,8 +154,10 @@ impl Connection { pub(crate) fn establish(&mut self, state: EstablishedState) -> Result<()> { if cfg!(test) { let conn = state.connection.lock(); - debug_assert!(conn.is_established() || conn.is_in_early_data(), - "connection must be established or ready for data") + debug_assert!( + conn.is_established() || conn.is_in_early_data(), + "connection must be established or ready for data" + ) } match self { Connection::Incoming(s) => { @@ -164,9 +166,13 @@ impl Connection { Ok(mut dgram) => { let mut conn = state.connection.lock(); conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) - .explain_err( - ErrorType::HandshakeError, |_| "receiving dgram failed")?; - debug!("connection {:?} dgram received while establishing", s.connection_id) + .explain_err(ErrorType::HandshakeError, |_| { + "receiving dgram failed" + })?; + debug!( + "connection {:?} dgram received while establishing", + s.connection_id + ) } Err(e) => { match e { @@ -182,15 +188,18 @@ impl Connection { } } } - debug_assert!(s.udp_rx.is_empty(), - "udp rx channel must be empty when establishing the connection"); + debug_assert!( + s.udp_rx.is_empty(), + "udp rx channel must be empty when establishing the connection" + ); debug!("connection {:?} established", state.connection_id); let _ = mem::replace(self, Connection::Established(state)); Ok(()) } Connection::Established(_) => Err(Error::explain( ErrorType::InternalError, - "establishing connection only possible on incoming connection")) + "establishing connection only possible on incoming connection", + )), } } } @@ -223,7 +232,7 @@ pub(crate) struct ConnectionTx { impl ConnectionTx { pub(crate) async fn start_tx(mut self) -> Result<()> { let id = self.connection_id; - let mut out = [0u8;MAX_IPV6_BUF_SIZE]; + let mut out = [0u8; MAX_IPV6_BUF_SIZE]; let mut finished_sending = false; let mut continue_write = false; @@ -232,13 +241,19 @@ impl ConnectionTx { // update stats from connection let max_send_burst = { let conn = self.connection.lock(); - self.tx_stats.max_send_burst(conn.stats(), conn.send_quantum()) + self.tx_stats + .max_send_burst(conn.stats(), conn.send_quantum()) }; let mut total_write = 0; let mut dst_info = None; // fill tx buffer with connection data - trace!("connection {:?} total_write={}, max_send_burst={}", id, total_write, max_send_burst); + trace!( + "connection {:?} total_write={}, max_send_burst={}", + id, + total_write, + max_send_burst + ); 'fill: while total_write < max_send_burst { let send = { let mut conn = self.connection.lock(); @@ -249,7 +264,7 @@ impl ConnectionTx { Ok((size, info)) => { debug!("connection {:?} sent to={:?}, length={}", id, info.to, size); (size, info) - }, + } Err(e) => { if e == quiche::Error::Done { trace!("connection {:?} send finished", id); @@ -258,12 +273,16 @@ impl ConnectionTx { } error!("connection {:?} send error: {:?}", id, e); /* TODO: close connection - let mut conn = self.connection.lock(); - conn.close(false, 0x1, b"fail").ok(); - */ + let mut conn = self.connection.lock(); + conn.close(false, 0x1, b"fail").ok(); + */ break 'write Err(Error::explain( ErrorType::WriteError, - format!("Connection {:?} send data to network failed with {:?}", id, e))); + format!( + "Connection {:?} send data to network failed with {:?}", + id, e + ), + )); } }; @@ -273,7 +292,7 @@ impl ConnectionTx { if size < self.tx_stats.max_datagram_size { continue_write = true; - break 'fill + break 'fill; } } @@ -292,16 +311,24 @@ impl ConnectionTx { self.tx_stats.max_datagram_size, self.socket_details.pacing_enabled, self.socket_details.gso_enabled, - ).await { + ) + .await + { if e.kind() == io::ErrorKind::WouldBlock { error!("connection {:?} network socket would block", id); - continue + continue; } break 'write Err(Error::explain( ErrorType::WriteError, - format!("connection {:?} network send failed with {:?}", id, e))); + format!("connection {:?} network send failed with {:?}", id, e), + )); } - trace!("connection {:?} network sent to={} bytes={}", id, dst_info.to, total_write); + trace!( + "connection {:?} network sent to={} bytes={}", + id, + dst_info.to, + total_write + ); if continue_write { continue 'write; @@ -319,7 +346,7 @@ impl ConnectionTx { pub struct TxStats { loss_rate: f64, max_send_burst: usize, - max_datagram_size: usize + max_datagram_size: usize, } impl TxStats { @@ -338,13 +365,11 @@ impl TxStats { if loss_rate > self.loss_rate + 0.001 { self.max_send_burst = self.max_send_burst / 4 * 3; // Minimum bound of 10xMSS. - self.max_send_burst = - self.max_send_burst.max(self.max_datagram_size * 10); + self.max_send_burst = self.max_send_burst.max(self.max_datagram_size * 10); self.loss_rate = loss_rate; } - send_quantum.min(self.max_send_burst) / - self.max_datagram_size * self.max_datagram_size + send_quantum.min(self.max_send_burst) / self.max_datagram_size * self.max_datagram_size } } @@ -352,7 +377,7 @@ impl AsRawFd for Connection { fn as_raw_fd(&self) -> RawFd { match self { Connection::Incoming(s) => s.socket.as_raw_fd(), - Connection::Established(s) => s.socket.as_raw_fd() + Connection::Established(s) => s.socket.as_raw_fd(), } } } @@ -365,12 +390,11 @@ impl Debug for Listener { } } - impl Connection { pub(crate) fn local_addr(&self) -> io::Result { match self { Connection::Incoming(s) => s.socket.local_addr(), - Connection::Established(s) => s.socket.local_addr() + Connection::Established(s) => s.socket.local_addr(), } } } @@ -396,10 +420,7 @@ impl AsyncWrite for Connection { Poll::Ready(Ok(())) } - fn poll_shutdown( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { + fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { todo!() } } diff --git a/pingora-core/src/protocols/l4/quic/sendto.rs b/pingora-core/src/protocols/l4/quic/sendto.rs index b35d8e611..42f7a9e53 100644 --- a/pingora-core/src/protocols/l4/quic/sendto.rs +++ b/pingora-core/src/protocols/l4/quic/sendto.rs @@ -26,7 +26,7 @@ use std::cmp; use std::io; -use std::os::fd::AsRawFd; +use std::os::unix::io::AsRawFd; /// For Linux, try to detect GSO is available. #[cfg(target_os = "linux")] @@ -46,7 +46,9 @@ pub fn detect_gso(_socket: &mio::net::UdpSocket, _segment_size: usize) -> bool { /// Send packets using sendmsg() with GSO. #[cfg(target_os = "linux")] fn send_to_gso_pacing( - socket: &tokio::net::UdpSocket, buf: &[u8], send_info: &quiche::SendInfo, + socket: &tokio::net::UdpSocket, + buf: &[u8], + send_info: &quiche::SendInfo, segment_size: usize, ) -> io::Result { use nix::sys::socket::sendmsg; @@ -54,7 +56,6 @@ fn send_to_gso_pacing( use nix::sys::socket::MsgFlags; use nix::sys::socket::SockaddrStorage; use std::io::IoSlice; - use std::os::unix::io::AsRawFd; let iov = [IoSlice::new(buf)]; let segment_size = segment_size as u16; @@ -83,7 +84,9 @@ fn send_to_gso_pacing( /// For non-Linux platforms. #[cfg(not(target_os = "linux"))] fn send_to_gso_pacing( - _socket: &mio::net::UdpSocket, _buf: &[u8], _send_info: &quiche::SendInfo, + _socket: &mio::net::UdpSocket, + _buf: &[u8], + _send_info: &quiche::SendInfo, _segment_size: usize, ) -> io::Result { panic!("send_to_gso() should not be called on non-linux platforms"); @@ -94,11 +97,15 @@ fn send_to_gso_pacing( /// When GSO and SO_TXTIME are enabled, send packets using send_to_gso(). /// Otherwise, send packets using socket.send_to(). pub async fn send_to( - socket: &tokio::net::UdpSocket, buf: &[u8], send_info: &quiche::SendInfo, - segment_size: usize, pacing: bool, enable_gso: bool, + socket: &tokio::net::UdpSocket, + buf: &[u8], + send_info: &quiche::SendInfo, + segment_size: usize, + pacing: bool, + enable_gso: bool, ) -> io::Result { if pacing && enable_gso { - return send_to_gso_pacing(socket, buf, send_info, segment_size) + return send_to_gso_pacing(socket, buf, send_info, segment_size); } let mut off = 0; @@ -111,7 +118,7 @@ pub async fn send_to( match socket.send_to(&buf[off..off + pkt_len], send_info.to).await { Ok(v) => { written += v; - }, + } Err(e) => return Err(e), } @@ -126,8 +133,7 @@ pub async fn send_to( fn std_time_to_u64(time: &std::time::Instant) -> u64 { const NANOS_PER_SEC: u64 = 1_000_000_000; - const INSTANT_ZERO: std::time::Instant = - unsafe { std::mem::transmute(std::time::UNIX_EPOCH) }; + const INSTANT_ZERO: std::time::Instant = unsafe { std::mem::transmute(std::time::UNIX_EPOCH) }; let raw_time = time.duration_since(INSTANT_ZERO); @@ -156,4 +162,4 @@ pub fn set_txtime_sockopt(sock: &tokio::net::UdpSocket) -> io::Result<()> { setsockopt(sock.as_raw_fd(), TxTime, &config)?; Ok(()) -} \ No newline at end of file +} diff --git a/pingora-core/src/protocols/l4/quic/settings.rs b/pingora-core/src/protocols/l4/quic/settings.rs index c0c288ccd..2d0e2f45f 100644 --- a/pingora-core/src/protocols/l4/quic/settings.rs +++ b/pingora-core/src/protocols/l4/quic/settings.rs @@ -1,8 +1,8 @@ -use std::sync::Arc; +use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; use parking_lot::Mutex; -use quiche::Config; use pingora_error::{ErrorType, OrErr, Result}; -use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; +use quiche::Config; +use std::sync::Arc; pub struct Settings { config: Arc>, @@ -30,8 +30,8 @@ impl Settings { // config.verify_peer(); default server = false; client = true // config.discover_pmtu(false); // default false config.grease(false); // default true - // config.log_keys() && config.set_keylog(); // logging SSL secrets - // config.set_ticket_key() // session ticket signer key material + // config.log_keys() && config.set_keylog(); // logging SSL secrets + // config.set_ticket_key() // session ticket signer key material //config.enable_early_data(); // can lead to ZeroRTT headers during handshake @@ -78,7 +78,7 @@ impl Settings { // config.set_disable_dcid_reuse(false) // default false Ok(Self { - config: Arc::new(Mutex::new(config)) + config: Arc::new(Mutex::new(config)), }) } @@ -89,6 +89,8 @@ impl Settings { impl From for Settings { fn from(config: Config) -> Self { - Self { config: Arc::new(Mutex::new(config)) } + Self { + config: Arc::new(Mutex::new(config)), + } } -} \ No newline at end of file +} diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs index e69de29bb..8b1378917 100644 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ b/pingora-core/src/protocols/l4/quic/tls_handshake.rs @@ -0,0 +1 @@ + diff --git a/pingora-core/src/protocols/l4/stream.rs b/pingora-core/src/protocols/l4/stream.rs index 6ceb2ce0e..aaa475e7d 100644 --- a/pingora-core/src/protocols/l4/stream.rs +++ b/pingora-core/src/protocols/l4/stream.rs @@ -40,8 +40,8 @@ use crate::protocols::l4::ext::{set_tcp_keepalive, TcpKeepalive}; use crate::protocols::l4::quic::Connection; use crate::protocols::raw_connect::ProxyDigest; use crate::protocols::{ - ConnectionState, GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, - Shutdown, SocketDigest, Ssl, TimingDigest, UniqueID, UniqueIDType, + ConnectionState, GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown, + SocketDigest, Ssl, TimingDigest, UniqueID, UniqueIDType, }; use crate::upstreams::peer::Tracer; @@ -533,7 +533,7 @@ impl ConnectionState for Stream { match &self.stream.get_ref().stream { RawStream::Quic(s) => s.is_quic_connection(), RawStream::Tcp(_) => false, - RawStream::Unix(_) => false + RawStream::Unix(_) => false, } } } diff --git a/pingora-core/src/protocols/mod.rs b/pingora-core/src/protocols/mod.rs index 046412ae8..03b201299 100644 --- a/pingora-core/src/protocols/mod.rs +++ b/pingora-core/src/protocols/mod.rs @@ -252,6 +252,7 @@ pub(crate) trait ConnSockReusable { fn check_sock_match(&self, sock: V) -> bool; } +use crate::protocols::l4::quic::Connection; use crate::protocols::tls::TlsRef; use l4::socket::SocketAddr; use log::{debug, error}; @@ -262,7 +263,6 @@ use std::os::unix::prelude::AsRawFd; #[cfg(windows)] use std::os::windows::io::AsRawSocket; use std::{net::SocketAddr as InetSocketAddr, path::Path}; -use crate::protocols::l4::quic::Connection; #[cfg(unix)] impl ConnFdReusable for SocketAddr { diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs index 6663ee194..bb3a0393d 100644 --- a/pingora-core/src/protocols/tls/quic/mod.rs +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -1,20 +1,26 @@ -use std::net::SocketAddr; -use std::sync::Arc; +use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; +use crate::protocols::l4::quic::{ + Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, + IncomingState, TxStats, MAX_IPV6_UDP_PACKET_SIZE, +}; +use crate::protocols::l4::stream::Stream as L4Stream; +use crate::protocols::ConnectionState; use log::{debug, error, trace, warn}; use parking_lot::Mutex; +use pingora_error::{Error, ErrorType, OrErr}; use quiche::ConnectionId; +use std::net::SocketAddr; +use std::sync::Arc; use tokio::net::UdpSocket; use tokio::sync::Notify; -use pingora_error::{Error, ErrorType, OrErr}; -use crate::protocols::ConnectionState; -use crate::protocols::l4::quic::{Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, IncomingState, TxStats, MAX_IPV6_UDP_PACKET_SIZE}; -use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; -use crate::protocols::l4::stream::Stream as L4Stream; pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { let Some(connection) = stream.quic_connection_state() else { debug_assert!(false, "quic::handshake called on stream of another type"); - return Err(Error::explain(ErrorType::InternalError, "stream is not a quic stream")) + return Err(Error::explain( + ErrorType::InternalError, + "stream is not a quic stream", + )); }; let e_state = match connection { @@ -24,13 +30,19 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { debug_assert!(false, "quic::handshake on already established connection"); - return Err(Error::explain(ErrorType::HandshakeError, "handshake state not of type incoming")) + return Err(Error::explain( + ErrorType::HandshakeError, + "handshake state not of type incoming", + )); } }; @@ -38,11 +50,16 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result pingora_error::Result> { +async fn handshake_inner( + state: &mut IncomingState, +) -> pingora_error::Result> { let IncomingState { connection_id: conn_id, config, @@ -56,7 +73,7 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result (size, info), Err(quiche::Error::Done) => break 'tx, - Err(e) => return Err(e).explain_err( - ErrorType::WriteError, |_| "creating handshake packet failed"), + Err(e) => { + return Err(e).explain_err(ErrorType::WriteError, |_| { + "creating handshake packet failed" + }) + } }; trace!("connection {:?} sending handshake packet", conn_id); - send_dgram(&conn_id, &socket, &out[..size], info.to).await + send_dgram(conn_id, socket, &out[..size], info.to) + .await .explain_err(ErrorType::WriteError, |_| "sending handshake packet failed")?; } @@ -200,12 +252,14 @@ async fn handshake_inner(state: &mut IncomingState) -> pingora_error::Result pingora_error::Result pingora_error::Result pingora_error::Result, io: &Arc, buf: &[u8], to: SocketAddr) -> pingora_error::Result { +async fn send_dgram( + id: &ConnectionId<'_>, + io: &Arc, + buf: &[u8], + to: SocketAddr, +) -> pingora_error::Result { match io.send_to(buf, &to).await { Ok(sent) => { - debug_assert_eq!(sent, buf.len(), "amount of network sent data does not correspond to packet size"); - trace!("connection {:?} sent dgram to={:?} length={:?} ", id, to, buf.len()); + debug_assert_eq!( + sent, + buf.len(), + "amount of network sent data does not correspond to packet size" + ); + trace!( + "connection {:?} sent dgram to={:?} length={:?} ", + id, + to, + buf.len() + ); Ok(sent) } Err(e) => { error!("Failed sending packet via UDP. Error: {:?}", e); Err(Error::explain( - ErrorType::WriteError, format!("Failed sending packet via UDP. Error: {:?}", e))) + ErrorType::WriteError, + format!("Failed sending packet via UDP. Error: {:?}", e), + )) } } -} \ No newline at end of file +} diff --git a/pingora-core/tests/utils/mod.rs b/pingora-core/tests/utils/mod.rs index 7f00fe0bc..d19e65811 100644 --- a/pingora-core/tests/utils/mod.rs +++ b/pingora-core/tests/utils/mod.rs @@ -29,7 +29,7 @@ use std::time::Duration; use pingora_core::apps::http_app::ServeHttp; use pingora_core::protocols::http::ServerSession; -use pingora_core::protocols::l4::quic::{MAX_IPV6_BUF_SIZE}; +use pingora_core::protocols::l4::quic::MAX_IPV6_BUF_SIZE; #[derive(Clone)] pub struct EchoApp; @@ -44,7 +44,7 @@ impl ServeHttp for EchoApp { while let Ok(b) = http_stream.read_request_body().await { match b { None => break, // finished reading request - Some(b) => body.put(b) + Some(b) => body.put(b), } } if body.is_empty() { @@ -53,12 +53,7 @@ impl ServeHttp for EchoApp { body.freeze() }; - let body = match timeout( - Duration::from_millis(read_timeout), - body_future, - ) - .await - { + let body = match timeout(Duration::from_millis(read_timeout), body_future).await { Ok(res) => res, Err(_) => { panic!("Timed out after {:?}ms", read_timeout); From 9976d7aecba25b0162384904e25a4aa84a14529d Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Tue, 14 Jan 2025 15:07:08 +0100 Subject: [PATCH 20/52] revert changes that are no longer required to keep diff concise --- pingora-core/src/apps/http_app.rs | 13 ++++++++----- pingora-core/src/apps/mod.rs | 2 -- pingora-core/src/connectors/mod.rs | 2 -- pingora-core/src/listeners/mod.rs | 7 +++++++ pingora-core/src/protocols/http/v3/server.rs | 2 +- pingora-core/src/protocols/tls/mod.rs | 2 +- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pingora-core/src/apps/http_app.rs b/pingora-core/src/apps/http_app.rs index b59dfedcf..91ca58ae1 100644 --- a/pingora-core/src/apps/http_app.rs +++ b/pingora-core/src/apps/http_app.rs @@ -67,7 +67,7 @@ where return None; } } - //trace!("{:?}", http.req_header()); + trace!("{:?}", http.req_header()); if *shutdown.borrow() { http.set_keepalive(None); } else { @@ -97,10 +97,13 @@ where ), } } - http.finish().await.unwrap_or_else(|e| { - error!("HTTP server fails to finish the request: {e}"); - None - }) + match http.finish().await { + Ok(c) => c, + Err(e) => { + error!("HTTP server fails to finish the request: {e}"); + None + } + } } } diff --git a/pingora-core/src/apps/mod.rs b/pingora-core/src/apps/mod.rs index 0fc69d214..1e46562c1 100644 --- a/pingora-core/src/apps/mod.rs +++ b/pingora-core/src/apps/mod.rs @@ -58,7 +58,6 @@ pub trait ServerApp { /// This callback will be called once after the service stops listening to its endpoints. async fn cleanup(&self) {} } - #[non_exhaustive] #[derive(Default)] /// HTTP Server options that control how the server handles some transport types. @@ -238,7 +237,6 @@ where let mut shutdown = shutdown.clone(); loop { // this loop ends when the client decides to close the h3 conn - // TODO: add a timeout? let h3_stream = tokio::select! { _ = shutdown.changed() => { match h3_conn.graceful_shutdown().await { diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index dd18c217e..5a126cc70 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -536,12 +536,10 @@ mod tests { assert!(!context.contains("total-connection timeout")); } - /* #[tokio::test] async fn test_do_connect_without_total_timeout() { let peer = BasicPeer::new(BLACK_HOLE); let (etype, context) = get_do_connect_failure_with_peer(&peer).await; assert!(etype != ConnectTimedout || !context.contains("total-connection timeout")); } - */ } diff --git a/pingora-core/src/listeners/mod.rs b/pingora-core/src/listeners/mod.rs index 4a358227e..bc7d3c565 100644 --- a/pingora-core/src/listeners/mod.rs +++ b/pingora-core/src/listeners/mod.rs @@ -140,6 +140,13 @@ impl Listeners { listeners } + /// Create a new [`Listeners`] with a QUIC server endpoint from the given string. + pub fn quic(&mut self, addr: &str) -> Self { + let mut listeners = Self::new(); + listeners.add_address(ServerAddress::Udp(addr.into(), None, ServerProtocol::Quic)); + listeners + } + /// Create a new [`Listeners`] with a Unix domain socket endpoint from the given string. #[cfg(unix)] pub fn uds(addr: &str, perm: Option) -> Self { diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index b8213f5ca..555d21306 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -659,7 +659,7 @@ impl HttpSession { ) })?; - let send= if capacity > data.len() - sent_len { + let send = if capacity > data.len() - sent_len { &data[sent_len..data.len()] } else { &data[sent_len..sent_len + capacity] diff --git a/pingora-core/src/protocols/tls/mod.rs b/pingora-core/src/protocols/tls/mod.rs index 69ef28e19..acc83bd4a 100644 --- a/pingora-core/src/protocols/tls/mod.rs +++ b/pingora-core/src/protocols/tls/mod.rs @@ -31,7 +31,7 @@ pub use rustls::*; #[cfg(not(feature = "any_tls"))] pub mod noop_tls; -pub(crate) mod quic; +pub mod quic; #[cfg(not(feature = "any_tls"))] pub use noop_tls::*; From cb93a55a6db20f361c7f8a1588a15247d761052d Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Wed, 15 Jan 2025 14:49:05 +0100 Subject: [PATCH 21/52] add Quic/Http3 test using quiche/h3i --- pingora-core/Cargo.toml | 3 +- pingora-core/tests/test_basic.rs | 98 ++++++++++++++++++++++++++++++-- 2 files changed, 96 insertions(+), 5 deletions(-) diff --git a/pingora-core/Cargo.toml b/pingora-core/Cargo.toml index 8dfa353b8..fa8133399 100644 --- a/pingora-core/Cargo.toml +++ b/pingora-core/Cargo.toml @@ -67,7 +67,7 @@ zstd = "0" httpdate = "1" x509-parser = { version = "0.16.0", optional = true } ouroboros = { version = "0.18.4", optional = true } -quiche = { git = 'https://github.com/cloudflare/quiche.git', rev = "1fd4557", optional = true } +quiche = { git = 'https://github.com/cloudflare/quiche.git', rev = "5d2031ca", optional = true } ring = { version = "0.17.8", optional = true } [target.'cfg(unix)'.dependencies] @@ -83,6 +83,7 @@ reqwest = { version = "0.11", features = [ "rustls-tls", ], default-features = false } hyper = "0.14" +h3i = { git = 'https://github.com/cloudflare/quiche.git', rev = "5d2031ca" } [target.'cfg(unix)'.dev-dependencies] hyperlocal = "0.8" diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index ff3ab24da..89d3d73e7 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -14,8 +14,14 @@ mod utils; +use std::env; +use h3i::actions::h3::send_headers_frame; +use h3i::frame::H3iFrame; #[cfg(all(unix, feature = "any_tls"))] use hyperlocal::{UnixClientExt, Uri}; +use log::{debug, error}; +use zstd::zstd_safe::WriteBuf; +use pingora_error::{ErrorType, OrErr, Result}; #[tokio::test] async fn test_http() { @@ -62,12 +68,96 @@ async fn test_uds() { } #[tokio::test] -async fn test_udp() { +async fn test_quic_http3() -> Result<()> { use log::info; use std::time::Duration; + use h3i::actions::h3::Action; + use h3i::actions::h3::StreamEvent; + use h3i::actions::h3::StreamEventType; + use h3i::actions::h3::WaitType; + use h3i::client::sync_client; + use h3i::config::Config; + use h3i::quiche::h3::frame::Frame; + use h3i::quiche::h3::Header; + use h3i::quiche::h3::NameValue; utils::init(); - info!("Startup completed.."); - tokio::time::sleep(Duration::from_secs(3600)).await; -} + + let config = Config::new() + .with_connect_to("127.0.0.1:6147".to_string()) + .with_host_port("openrusty.org:6147".to_string()) + .with_idle_timeout(2000) + .verify_peer(false) + .build() + .unwrap(); + + let body = b"test".to_vec(); + let headers = vec![ + Header::new(b":method", b"POST"), + Header::new(b":scheme", b"https"), + Header::new(b":authority", b"openrusty.org"), + Header::new(b":path", b"/"), + Header::new(b"content-length", body.len().to_string().as_bytes()), + ]; + const STREAM_ID: u64 = 0; + let actions = vec![ + send_headers_frame(STREAM_ID, false, headers), + Action::SendFrame { + stream_id: STREAM_ID, + fin_stream: true, + frame: Frame::Data { + payload: body.clone(), + }, + }, + Action::Wait { + wait_type: WaitType::StreamEvent(StreamEvent { + stream_id: STREAM_ID, + event_type: StreamEventType::Finished, + }), + }, + Action::ConnectionClose { + error: quiche::ConnectionError { + is_app: true, + error_code: quiche::h3::WireErrorCode::NoError as u64, + reason: vec![], + }, + }, + ]; + + let summary = sync_client::connect(config, &actions, None) + .explain_err(ErrorType::H3Error, |e| format!("connection failed {:?}", e))?; + + debug!("summary: {:?}", &summary); + + let stream = summary.stream_map.stream(STREAM_ID); + let resp_headers = stream.iter().find(|e| matches!(e, H3iFrame::Headers(..))).unwrap(); + let resp_body : Vec> = stream.iter() + .filter_map(|f| { + match f { + H3iFrame::QuicheH3(f) => { + match f { + Frame::Data { payload } => { + Some(payload.clone()) + } + _ => None + } + } + _ => None + } + }).collect(); + + + debug!("response headers: {:?}, body: {:?} ", &resp_headers, String::from_utf8(resp_body[0].clone())); + let headers = resp_headers.to_enriched_headers().unwrap(); + let headers = headers.header_map(); + let status = headers.get(b":status".as_slice()).unwrap(); + let content_type = headers.get(b"content-type".as_slice()).unwrap(); + let content_length = headers.get(b"content-length".as_slice()).unwrap(); + assert_eq!(status, &b"200".to_vec()); + assert_eq!(content_type, &b"text/html".to_vec()); + assert_eq!(content_length, &body.len().to_string().as_bytes().to_vec()); + assert_eq!(resp_body[0], body.as_slice().to_vec()); + + Ok(()) +} \ No newline at end of file From e95cdc73ed8b6862e7befef9bbdddfdce447a44e Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Wed, 15 Jan 2025 15:28:36 +0100 Subject: [PATCH 22/52] add timeout test, fix timeout handling --- pingora-core/src/protocols/http/v3/server.rs | 48 +++---- pingora-core/tests/test_basic.rs | 126 +++++++++++++------ 2 files changed, 104 insertions(+), 70 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 555d21306..1655006b2 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -30,7 +30,7 @@ use std::fmt::Debug; use std::future::Future; use std::pin::Pin; use std::sync::{Arc, OnceLock}; -use std::time::{Duration, Instant}; +use std::time::Duration; use crate::protocols::http::body_buffer::FixedBuffer; use crate::protocols::http::v3::nohash::StreamIdHashMap; @@ -51,7 +51,6 @@ const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; const BODY_BUF_LIMIT: usize = 1024 * 64; const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); -const DEFAULT_CONNECTION_IDLE_TIMEOUT: Duration = Duration::from_millis(1000); /// Perform HTTP/3 connection handshake with an established (QUIC) connection. /// @@ -394,7 +393,6 @@ impl HttpSession { let is_closed; let timeout; - let timeout_now; { let qconn = conn.quic_connection.lock(); is_closed = qconn.is_closed() @@ -415,8 +413,7 @@ impl HttpSession { ); } } - timeout = qconn.timeout_instant(); - timeout_now = Instant::now(); + timeout = qconn.timeout(); } if is_closed { @@ -437,43 +434,26 @@ impl HttpSession { // race for new data on connection or timeout tokio::select! { _data = conn.rx_notify.notified() => {} - used_timeout_duration = async { - // FIXME: check if this is still an issue - // quiche timeout instants are on the initial calls very short - // quiche timeout durations are sometimes 0ns, None would be expected - // this can lead to premature closing of the connection - // guarding with DEFAULT_CONNECTION_IDLE_TIMEOUT + _timedout = async { if let Some(timeout) = timeout { - if Some(timeout) < Instant::now().checked_add(DEFAULT_CONNECTION_IDLE_TIMEOUT) { - trace!("connection {:?} default timeout {:?}", conn.connection_id, DEFAULT_CONNECTION_IDLE_TIMEOUT); - tokio::time::sleep(DEFAULT_CONNECTION_IDLE_TIMEOUT).await; - DEFAULT_CONNECTION_IDLE_TIMEOUT - } else { - let timeout_duration = timeout.duration_since(timeout_now); - tokio::time::sleep(timeout_duration).await; - trace!("connection {:?} timeout {:?}", conn.connection_id, timeout_duration); - timeout_duration - } + debug!("connection {:?} timeout {:?}", conn.connection_id, timeout); + tokio::time::sleep(timeout).await } else { - trace!("connection {:?} default timeout {:?}", conn.connection_id, DEFAULT_CONNECTION_IDLE_TIMEOUT); - tokio::time::sleep(DEFAULT_CONNECTION_IDLE_TIMEOUT).await; - DEFAULT_CONNECTION_IDLE_TIMEOUT + debug!("connection {:?} timeout not present", conn.connection_id); + tokio::time::sleep(Duration::MAX).await } } => { conn.sessions_housekeeping().await; if !conn.sessions.is_empty() { - warn!("connection {:?} timeout {:?} reached with {} open sessions {:?}", - conn.connection_id, used_timeout_duration, conn.sessions.len(), conn.sessions); - } else { - { - let mut qconn = conn.quic_connection.lock(); - qconn.on_timeout(); - } + warn!("connection {:?} timed out with {} open sessions", + conn.connection_id, conn.sessions.len()); + } + let mut qconn = conn.quic_connection.lock(); + // closes connection + qconn.on_timeout(); + if let Some(timeout) = timeout { debug!("connection {:?} timed out {:?}", conn.connection_id, timeout); } - - conn.tx_notify.notify_waiters(); - return Ok(None) } } } diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index 89d3d73e7..aef80ffe8 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -14,14 +14,22 @@ mod utils; -use std::env; -use h3i::actions::h3::send_headers_frame; -use h3i::frame::H3iFrame; #[cfg(all(unix, feature = "any_tls"))] use hyperlocal::{UnixClientExt, Uri}; -use log::{debug, error}; -use zstd::zstd_safe::WriteBuf; +use log::{debug, info}; use pingora_error::{ErrorType, OrErr, Result}; +use std::time::{Duration, Instant}; + +use h3i::actions::h3::send_headers_frame; +use h3i::actions::h3::Action; +use h3i::actions::h3::StreamEvent; +use h3i::actions::h3::StreamEventType; +use h3i::actions::h3::WaitType; +use h3i::client::sync_client; +use h3i::config::Config; +use h3i::frame::H3iFrame; +use h3i::quiche::h3::frame::Frame; +use h3i::quiche::h3::Header; #[tokio::test] async fn test_http() { @@ -69,18 +77,6 @@ async fn test_uds() { #[tokio::test] async fn test_quic_http3() -> Result<()> { - use log::info; - use std::time::Duration; - use h3i::actions::h3::Action; - use h3i::actions::h3::StreamEvent; - use h3i::actions::h3::StreamEventType; - use h3i::actions::h3::WaitType; - use h3i::client::sync_client; - use h3i::config::Config; - use h3i::quiche::h3::frame::Frame; - use h3i::quiche::h3::Header; - use h3i::quiche::h3::NameValue; - utils::init(); info!("Startup completed.."); @@ -131,24 +127,23 @@ async fn test_quic_http3() -> Result<()> { debug!("summary: {:?}", &summary); let stream = summary.stream_map.stream(STREAM_ID); - let resp_headers = stream.iter().find(|e| matches!(e, H3iFrame::Headers(..))).unwrap(); - let resp_body : Vec> = stream.iter() - .filter_map(|f| { - match f { - H3iFrame::QuicheH3(f) => { - match f { - Frame::Data { payload } => { - Some(payload.clone()) - } - _ => None - } - } - _ => None - } - }).collect(); - - - debug!("response headers: {:?}, body: {:?} ", &resp_headers, String::from_utf8(resp_body[0].clone())); + let resp_headers = stream + .iter() + .find(|e| matches!(e, H3iFrame::Headers(..))) + .unwrap(); + let resp_body: Vec> = stream + .iter() + .filter_map(|f| match f { + H3iFrame::QuicheH3(Frame::Data { payload }) => Some(payload.clone()), + _ => None, + }) + .collect(); + + debug!( + "response headers: {:?}, body: {:?} ", + &resp_headers, + String::from_utf8(resp_body[0].clone()) + ); let headers = resp_headers.to_enriched_headers().unwrap(); let headers = headers.header_map(); let status = headers.get(b":status".as_slice()).unwrap(); @@ -160,4 +155,63 @@ async fn test_quic_http3() -> Result<()> { assert_eq!(resp_body[0], body.as_slice().to_vec()); Ok(()) -} \ No newline at end of file +} + +#[tokio::test] +async fn test_quic_http3_timeout() -> Result<()> { + utils::init(); + info!("Startup completed.."); + + let config = Config::new() + .with_connect_to("127.0.0.1:6147".to_string()) + .with_host_port("openrusty.org:6147".to_string()) + .with_idle_timeout(3000) + .verify_peer(false) + .build() + .unwrap(); + + let body = b"test".to_vec(); + let headers = vec![ + Header::new(b":method", b"POST"), + Header::new(b":scheme", b"https"), + Header::new(b":authority", b"openrusty.org"), + Header::new(b":path", b"/"), + Header::new(b"content-length", body.len().to_string().as_bytes()), + ]; + const STREAM_ID: u64 = 0; + let actions = vec![ + send_headers_frame(STREAM_ID, false, headers), + Action::SendFrame { + stream_id: STREAM_ID, + fin_stream: true, + frame: Frame::Data { + payload: body.clone(), + }, + }, + Action::Wait { + wait_type: WaitType::StreamEvent(StreamEvent { + stream_id: STREAM_ID, + event_type: StreamEventType::Finished, + }), + }, + ]; + + let now = Instant::now(); + let summary = sync_client::connect(config.clone(), &actions, None) + .explain_err(ErrorType::H3Error, |e| format!("connection failed {:?}", e))?; + let runtime = now.elapsed(); + + assert!(runtime >= Duration::from_millis(config.idle_timeout)); + assert!(runtime < Duration::from_millis(config.idle_timeout + 100)); + + let stream = summary.stream_map.stream(STREAM_ID); + let resp_headers = stream + .iter() + .find(|e| matches!(e, H3iFrame::Headers(..))) + .unwrap() + .to_enriched_headers() + .unwrap(); + let status = resp_headers.status_code().unwrap(); + assert_eq!(status, &b"200".to_vec()); + Ok(()) +} From 5f28c3757dd158735dc63d817a1345f6cfd7f358 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Wed, 15 Jan 2025 17:47:39 +0100 Subject: [PATCH 23/52] refactor Quic & Http3 config handling, enable user provided configs --- pingora-core/src/listeners/l4.rs | 16 ++- pingora-core/src/listeners/mod.rs | 37 ++++- pingora-core/src/protocols/http/server.rs | 2 +- pingora-core/src/protocols/http/v3/server.rs | 14 +- .../src/protocols/l4/quic/listener.rs | 86 +++++------ pingora-core/src/protocols/l4/quic/mod.rs | 134 +++++++++++++++++- .../src/protocols/l4/quic/settings.rs | 96 ------------- pingora-core/src/protocols/tls/quic/mod.rs | 6 +- pingora-core/tests/test_basic.rs | 2 +- pingora-core/tests/utils/mod.rs | 5 +- 10 files changed, 227 insertions(+), 171 deletions(-) delete mode 100644 pingora-core/src/protocols/l4/quic/settings.rs diff --git a/pingora-core/src/listeners/l4.rs b/pingora-core/src/listeners/l4.rs index 3b7ac9b97..57c99ad5b 100644 --- a/pingora-core/src/listeners/l4.rs +++ b/pingora-core/src/listeners/l4.rs @@ -17,6 +17,7 @@ use pingora_error::{ ErrorType::{AcceptError, BindError}, OrErr, Result, }; +use std::fmt::Debug; use std::fs::Permissions; use std::io::ErrorKind; use std::net::{SocketAddr, ToSocketAddrs}; @@ -31,7 +32,7 @@ use tokio::net::{TcpSocket, UdpSocket}; use crate::protocols::l4::ext::{set_dscp, set_tcp_fastopen_backlog}; use crate::protocols::l4::listener::Listener; -use crate::protocols::l4::quic::Listener as QuicListener; +use crate::protocols::l4::quic::{Listener as QuicListener, QuicHttp3Configs}; pub use crate::protocols::l4::stream::Stream; use crate::protocols::TcpKeepalive; #[cfg(unix)] @@ -54,7 +55,7 @@ pub enum ServerAddress { #[derive(Clone, Debug)] pub enum ServerProtocol { // e.g. raw UDP, QUIC flavours/implementations/versions - Quic, + Quic(QuicHttp3Configs), } impl AsRef for ServerAddress { @@ -231,10 +232,10 @@ fn from_raw_fd(address: &ServerAddress, fd: i32) -> Result { let std_listener_socket = unsafe { std::net::UdpSocket::from_raw_socket(fd as u64) }; match proto { - ServerProtocol::Quic => { + ServerProtocol::Quic(conf) => { let socket = UdpSocket::from_std(std_listener_socket) .or_err_with(BindError, || format!("Listen() failed on {address:?}"))?; - Ok(QuicListener::try_from(socket)?.into()) + Ok(QuicListener::try_from((socket, conf.clone()))?.into()) } } } @@ -371,13 +372,16 @@ async fn bind(addr: &ServerAddress) -> Result { ServerAddress::Uds(l, perm) => uds::bind(l, perm.clone()), ServerAddress::Tcp(l, opt) => bind_tcp(l, opt.clone()).await, ServerAddress::Udp(l, opt, proto) => match proto { - ServerProtocol::Quic => { + ServerProtocol::Quic(conf) => { let std_socket = bind_udp_socket(l, opt.clone()) .await .or_err(BindError, "bind() failed")?; let tokio_socket = UdpSocket::try_from(std_socket) .or_err(BindError, "failed to create UdpSocket")?; - Ok(Listener::from(QuicListener::try_from(tokio_socket)?)) + Ok(Listener::from(QuicListener::try_from(( + tokio_socket, + conf.clone(), + ))?)) } }, } diff --git a/pingora-core/src/listeners/mod.rs b/pingora-core/src/listeners/mod.rs index bc7d3c565..26eb71fa8 100644 --- a/pingora-core/src/listeners/mod.rs +++ b/pingora-core/src/listeners/mod.rs @@ -35,6 +35,8 @@ use std::{fs::Permissions, sync::Arc}; use l4::{ListenerEndpoint, ServerProtocol, Stream as L4Stream}; use tls::{Acceptor, TlsSettings}; +use crate::listeners::l4::UdpSocketOptions; +use crate::protocols::l4::quic::QuicHttp3Configs; pub use crate::protocols::tls::ALPN; pub use l4::{ServerAddress, TcpSocketOptions}; @@ -140,11 +142,16 @@ impl Listeners { listeners } - /// Create a new [`Listeners`] with a QUIC server endpoint from the given string. - pub fn quic(&mut self, addr: &str) -> Self { + /// Create a new [`Listeners`] with a QUIC server endpoint from the given string and + /// according [`QuicHttp3Configs`]. + pub fn quic(&mut self, addr: &str, configs: QuicHttp3Configs) -> Result { let mut listeners = Self::new(); - listeners.add_address(ServerAddress::Udp(addr.into(), None, ServerProtocol::Quic)); - listeners + listeners.add_address(ServerAddress::Udp( + addr.into(), + None, + ServerProtocol::Quic(configs), + )); + Ok(listeners) } /// Create a new [`Listeners`] with a Unix domain socket endpoint from the given string. @@ -166,8 +173,26 @@ impl Listeners { } /// Add a QUIC endpoint to `self`. - pub fn add_quic(&mut self, addr: &str) { - self.add_address(ServerAddress::Udp(addr.into(), None, ServerProtocol::Quic)); + pub fn add_quic(&mut self, addr: &str, configs: QuicHttp3Configs) { + self.add_address(ServerAddress::Udp( + addr.into(), + None, + ServerProtocol::Quic(configs), + )); + } + + /// Add a QUIC endpoint to `self`, with the given [`UdpSocketOptions`]. + pub fn add_quic_with_settings( + &mut self, + addr: &str, + sock_opt: Option, + configs: QuicHttp3Configs, + ) { + self.add_address(ServerAddress::Udp( + addr.into(), + sock_opt, + ServerProtocol::Quic(configs), + )); } /// Add a TCP endpoint to `self`. diff --git a/pingora-core/src/protocols/http/server.rs b/pingora-core/src/protocols/http/server.rs index 3ce3b65d9..db114a3c4 100644 --- a/pingora-core/src/protocols/http/server.rs +++ b/pingora-core/src/protocols/http/server.rs @@ -265,7 +265,7 @@ impl Session { match self { Self::H1(s) => s.set_ignore_info_resp(ignore), Self::H2(_) => {} // always ignored - Self::H3(_) => {} // TODO: check if there is a need for an implementation + Self::H3(_) => {} // always ignored } } diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 1655006b2..80f459023 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -29,7 +29,7 @@ use std::collections::VecDeque; use std::fmt::Debug; use std::future::Future; use std::pin::Pin; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use std::time::Duration; use crate::protocols::http::body_buffer::FixedBuffer; @@ -45,8 +45,6 @@ use quiche::{h3, Connection as QuicheConnection, ConnectionId, Shutdown}; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::{mpsc, Notify}; -static H3_OPTIONS: OnceLock = OnceLock::new(); - const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; const BODY_BUF_LIMIT: usize = 1024 * 64; @@ -57,8 +55,6 @@ const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); /// The optional `options` allow to adjust certain HTTP/3 parameters and settings. /// See [`H3Options`] for more details. pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result { - let options = options.unwrap_or(H3_OPTIONS.get_or_init(|| H3Options::new().unwrap())); - let Some(conn) = io.quic_connection_state() else { return Err(Error::explain( ErrorType::ConnectError, @@ -75,8 +71,14 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ let hconn = { + let http3_config = if let Some(h3_options) = options { + h3_options + } else { + &state.http3_config + }; + let mut qconn = state.connection.lock(); - h3::Connection::with_transport(&mut qconn, options) + h3::Connection::with_transport(&mut qconn, http3_config) .explain_err(ErrorType::ConnectError, |_| { "failed to create H3 connection" })? diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index ac0e36cc4..e71ddeb88 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -19,12 +19,15 @@ use std::sync::Arc; use tokio::net::UdpSocket; use tokio::sync::mpsc::channel; +use crate::protocols::l4::quic::QuicHttp3Configs; use quiche::Connection as QuicheConnection; -impl TryFrom for Listener { +impl TryFrom<(UdpSocket, QuicHttp3Configs)> for Listener { type Error = BError; - fn try_from(io: UdpSocket) -> pingora_error::Result { + fn try_from( + (io, configs): (UdpSocket, QuicHttp3Configs), + ) -> pingora_error::Result { let addr = io.local_addr().map_err(|e| { Error::explain( ErrorType::SocketError, @@ -39,8 +42,6 @@ impl TryFrom for Listener { ) })?; - let settings = crate::protocols::l4::quic::settings::Settings::try_default()?; - let gso_enabled = detect_gso(&io, MAX_IPV6_QUIC_DATAGRAM_SIZE); let pacing_enabled = match set_txtime_sockopt(&io) { Ok(_) => { @@ -61,7 +62,7 @@ impl TryFrom for Listener { pacing_enabled, }, - config: settings.get_config(), + configs, crypto: Crypto { key }, connections: Default::default(), @@ -131,7 +132,7 @@ impl Listener { let mut conn_id = header.dcid.clone(); let mut udp_tx = None; - + let mut established_handle = None; // send to corresponding connection let mut handle; handle = self.connections.get_mut(&conn_id); @@ -152,7 +153,7 @@ impl Listener { "existing connection {:?} {:?} {:?}", conn_id, handle, header ); - let mut established_handle = None; + let mut needs_establish = None; match handle { ConnectionHandle::Incoming(i) => { let resp; @@ -166,10 +167,11 @@ impl Listener { "connection {:?} received HandshakeResponse::Established", conn_id ); - // receive data into existing connection - established_handle = Some(e); + established_handle = Some(e.clone()); + needs_establish = Some(e); } HandshakeResponse::Ignored | HandshakeResponse::Rejected => { + // drop connection self.connections.remove(&header.dcid); continue 'read; } @@ -179,48 +181,37 @@ impl Listener { } } ConnectionHandle::Established(e) => { - // receive data into existing connection - match Self::recv_connection( - &conn_id, - e.connection.as_ref(), - &mut rx_buf[..size], - recv_info, - ) { - Ok(_len) => { - e.rx_notify.notify_waiters(); - e.tx_notify.notify_waiters(); - continue 'read; - } - Err(e) => { - // TODO: take action on errors, e.g close connection, send & remove - break 'read Err(e); - } - } + established_handle = Some(e.clone()); } } - if let Some(e) = established_handle { - match Self::recv_connection( - &conn_id, - e.connection.as_ref(), - &mut rx_buf[..size], - recv_info, - ) { - Ok(_len) => { - e.rx_notify.notify_waiters(); - e.tx_notify.notify_waiters(); - // transition connection - handle.establish(e); - continue 'read; - } - Err(e) => { - // TODO: take action on errors, e.g close connection, send & remove - break 'read Err(e); - } - } + if let Some(e) = needs_establish { + handle.establish(e) } }; + + // receive data into existing connection + if let Some(e) = established_handle { + match Self::recv_connection( + &conn_id, + e.connection.as_ref(), + &mut rx_buf[..size], + recv_info, + ) { + Ok(_len) => { + e.rx_notify.notify_waiters(); + e.tx_notify.notify_waiters(); + // TODO: handle path events + continue 'read; + } + Err(e) => { + // TODO: take action on errors, e.g close connection, send & remove + break 'read Err(e); + } + } + } + + // receive data on UDP channel if let Some(udp_tx) = udp_tx { - // receive data on UDP channel match udp_tx .send(UdpRecv { pkt: rx_buf[..size].to_vec(), @@ -253,9 +244,10 @@ impl Listener { debug!("new incoming connection {:?}", conn_id); let connection = Connection::Incoming(IncomingState { connection_id: conn_id.clone(), - config: self.config.clone(), drop_connection: self.drop_connections.clone(), + configs: self.configs.clone(), + socket: self.socket.clone(), socket_details: self.socket_details.clone(), udp_rx, diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index db69193c2..b9b5a6fe8 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -2,7 +2,8 @@ use log::{debug, error, trace}; use parking_lot::Mutex; use pingora_error::{Error, ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; -use quiche::{Config, ConnectionId, Header, RecvInfo, Stats}; +use quiche::{h3, Config}; +use quiche::{ConnectionId, Header, RecvInfo, Stats}; use ring::hmac::Key; use std::collections::{HashMap, VecDeque}; use std::fmt::{Debug, Formatter}; @@ -22,7 +23,6 @@ use tokio::task::JoinHandle; pub(crate) mod id_token; mod listener; mod sendto; -mod settings; pub(crate) mod tls_handshake; use crate::protocols::l4::quic::sendto::send_to; @@ -48,7 +48,7 @@ pub struct Listener { socket: Arc, socket_details: SocketDetails, - config: Arc>, + configs: QuicHttp3Configs, crypto: Crypto, connections: HashMap, ConnectionHandle>, @@ -66,7 +66,7 @@ pub enum Connection { pub struct IncomingState { pub(crate) connection_id: ConnectionId<'static>, - pub(crate) config: Arc>, + pub(crate) configs: QuicHttp3Configs, pub(crate) drop_connection: Arc>>>, pub(crate) socket: Arc, @@ -91,6 +91,7 @@ pub struct EstablishedState { pub(crate) connection_id: ConnectionId<'static>, pub(crate) connection: Arc>, pub(crate) drop_connection: Arc>>>, + pub(crate) http3_config: Arc, pub(crate) rx_notify: Arc, pub(crate) tx_notify: Arc, pub(crate) socket: Arc, @@ -445,3 +446,128 @@ impl ConnectionState for Connection { true } } + +#[derive(Clone)] +pub struct QuicHttp3Configs { + quic: Arc>, + http3: Arc, +} + +impl QuicHttp3Configs { + pub fn new_quic(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { + let mut quic = Config::new(quiche::PROTOCOL_VERSION) + .explain_err(ErrorType::InternalError, |_| { + "Failed to create quiche config." + })?; + + quic.load_cert_chain_from_pem_file(cert_chain_pem_file) + .explain_err(ErrorType::FileReadError, |_| { + "Could not load certificate chain from pem file." + })?; + + quic.load_priv_key_from_pem_file(priv_key_pem_file) + .explain_err(ErrorType::FileReadError, |_| { + "Could not load private key from pem file." + })?; + + // quic.load_verify_locations_from_file() for CA's + // quic.verify_peer(); default server = false; client = true + // quic.discover_pmtu(false); // default false + quic.grease(false); // default true + // quic.log_keys() && config.set_keylog(); // logging SSL secrets + // quic.set_ticket_key() // session ticket signer key material + + //config.enable_early_data(); // can lead to ZeroRTT headers during handshake + + quic.set_application_protos(h3::APPLICATION_PROTOCOL) + .explain_err(ErrorType::InternalError, |_| { + "Failed to set application protocols." + })?; + + // quic.set_application_protos_wire_format(); + // quic.set_max_amplification_factor(3); // anti-amplification limit factor; default 3 + + quic.set_max_idle_timeout(60 * 1000); // default ulimited + quic.set_max_recv_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // recv default is 65527 + quic.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 + quic.set_initial_max_data(10_000_000); // 10 Mb + quic.set_initial_max_stream_data_bidi_local(1_000_000); // 1 Mb + quic.set_initial_max_stream_data_bidi_remote(1_000_000); // 1 Mb + quic.set_initial_max_stream_data_uni(1_000_000); // 1 Mb + quic.set_initial_max_streams_bidi(100); + quic.set_initial_max_streams_uni(100); + + // quic.set_ack_delay_exponent(3); // default 3 + // quic.set_max_ack_delay(25); // default 25 + // quic.set_active_connection_id_limit(2); // default 2 + // quic.set_disable_active_migration(false); // default false + + // quic.set_active_connection_id_limit(2); // default 2 + // quic.set_disable_active_migration(false); // default false + // quic.set_cc_algorithm_name("cubic"); // default cubic + // quic.set_initial_congestion_window_packets(10); // default 10 + // quic.set_cc_algorithm(CongestionControlAlgorithm::CUBIC); // default CongestionControlAlgorithm::CUBIC + + // quic.enable_hystart(true); // default true + // quic.enable_pacing(true); // default true + // quic.set_max_pacing_rate(); // default ulimited + + //config.enable_dgram(false); // default false + + // quic.set_path_challenge_recv_max_queue_len(3); // default 3 + // quic.set_max_connection_window(MAX_CONNECTION_WINDOW); // default 24 Mb + // quic.set_max_stream_window(MAX_STREAM_WINDOW); // default 16 Mb + // quic.set_stateless_reset_token(None) // default None + // quic.set_disable_dcid_reuse(false) // default false + + Ok(quic) + } + + fn new_http3() -> Result { + h3::Config::new().explain_err(ErrorType::InternalError, |_| { + "failed to create new h3::Config" + }) + } + + pub fn from_cert_key_path(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { + Ok(Self { + quic: Arc::new(Mutex::new(Self::new_quic( + cert_chain_pem_file, + priv_key_pem_file, + )?)), + http3: Arc::new(Self::new_http3()?), + }) + } + + pub fn new(quic: Config, http3: h3::Config) -> Self { + Self { + quic: Arc::new(Mutex::new(quic)), + http3: Arc::new(http3), + } + } + + pub fn try_from(quic: Config) -> Result { + let http3 = h3::Config::new().explain_err(ErrorType::InternalError, |_| { + "failed to create new h3::Config" + })?; + + Ok(Self { + quic: Arc::new(Mutex::new(quic)), + http3: Arc::new(http3), + }) + } + + pub fn quic(&self) -> &Arc> { + &self.quic + } + pub fn http3(&self) -> &Arc { + &self.http3 + } +} + +impl Debug for QuicHttp3Configs { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut dbg = f.debug_struct("Configs"); + dbg.finish() + } +} diff --git a/pingora-core/src/protocols/l4/quic/settings.rs b/pingora-core/src/protocols/l4/quic/settings.rs deleted file mode 100644 index 2d0e2f45f..000000000 --- a/pingora-core/src/protocols/l4/quic/settings.rs +++ /dev/null @@ -1,96 +0,0 @@ -use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; -use parking_lot::Mutex; -use pingora_error::{ErrorType, OrErr, Result}; -use quiche::Config; -use std::sync::Arc; - -pub struct Settings { - config: Arc>, -} - -impl Settings { - pub(crate) fn try_default() -> Result { - // TODO: use pingora config values where possible - // enable user provided default config - - let mut config = Config::new(quiche::PROTOCOL_VERSION) - .explain_err(ErrorType::InternalError, |_| { - "Failed to create quiche config." - })?; - - config - .load_cert_chain_from_pem_file("/home/hargut/Sources/github.com/pingora/pingora-proxy/tests/utils/conf/keys/server_rustls.crt") - .explain_err(ErrorType::FileReadError, |_| "Could not load certificate chain from pem file.")?; - - config - .load_priv_key_from_pem_file("/home/hargut/Sources/github.com/pingora/pingora-proxy/tests/utils/conf/keys/key.pem") - .explain_err(ErrorType::FileReadError, |_| "Could not load private key from pem file.")?; - - // config.load_verify_locations_from_file() for CA's - // config.verify_peer(); default server = false; client = true - // config.discover_pmtu(false); // default false - config.grease(false); // default true - // config.log_keys() && config.set_keylog(); // logging SSL secrets - // config.set_ticket_key() // session ticket signer key material - - //config.enable_early_data(); // can lead to ZeroRTT headers during handshake - - config - .set_application_protos(quiche::h3::APPLICATION_PROTOCOL) - .explain_err(ErrorType::InternalError, |_| { - "Failed to set application protocols." - })?; - - // config.set_application_protos_wire_format(); - // config.set_max_amplification_factor(3); // anti-amplification limit factor; default 3 - - config.set_max_idle_timeout(60 * 1000); // default ulimited - config.set_max_recv_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // recv default is 65527 - config.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 - config.set_initial_max_data(10_000_000); // 10 Mb - config.set_initial_max_stream_data_bidi_local(1_000_000); // 1 Mb - config.set_initial_max_stream_data_bidi_remote(1_000_000); // 1 Mb - config.set_initial_max_stream_data_uni(1_000_000); // 1 Mb - config.set_initial_max_streams_bidi(100); - config.set_initial_max_streams_uni(100); - - // config.set_ack_delay_exponent(3); // default 3 - // config.set_max_ack_delay(25); // default 25 - // config.set_active_connection_id_limit(2); // default 2 - // config.set_disable_active_migration(false); // default false - - // config.set_active_connection_id_limit(2); // default 2 - // config.set_disable_active_migration(false); // default false - // config.set_cc_algorithm_name("cubic"); // default cubic - // config.set_initial_congestion_window_packets(10); // default 10 - // config.set_cc_algorithm(CongestionControlAlgorithm::CUBIC); // default CongestionControlAlgorithm::CUBIC - - // config.enable_hystart(true); // default true - // config.enable_pacing(true); // default true - // config.set_max_pacing_rate(); // default ulimited - - //config.enable_dgram(false); // default false - - // config.set_path_challenge_recv_max_queue_len(3); // default 3 - // config.set_max_connection_window(MAX_CONNECTION_WINDOW); // default 24 Mb - // config.set_max_stream_window(MAX_STREAM_WINDOW); // default 16 Mb - // config.set_stateless_reset_token(None) // default None - // config.set_disable_dcid_reuse(false) // default false - - Ok(Self { - config: Arc::new(Mutex::new(config)), - }) - } - - pub(crate) fn get_config(&self) -> Arc> { - self.config.clone() - } -} - -impl From for Settings { - fn from(config: Config) -> Self { - Self { - config: Arc::new(Mutex::new(config)), - } - } -} diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs index bb3a0393d..34b8c00b2 100644 --- a/pingora-core/src/protocols/tls/quic/mod.rs +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -62,7 +62,7 @@ async fn handshake_inner( ) -> pingora_error::Result> { let IncomingState { connection_id: conn_id, - config, + configs, drop_connection, socket, @@ -206,7 +206,7 @@ async fn handshake_inner( let mut conn; { - let mut config = config.lock(); + let mut config = configs.quic().lock(); conn = quiche::accept( &hdr.dcid, Some(&initial_dcid), @@ -333,6 +333,8 @@ async fn handshake_inner( connection: connection.clone(), drop_connection: drop_connection.clone(), + http3_config: configs.http3().clone(), + rx_notify: rx_notify.clone(), tx_notify: tx_notify.clone(), }; diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index aef80ffe8..19e39d863 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -165,7 +165,7 @@ async fn test_quic_http3_timeout() -> Result<()> { let config = Config::new() .with_connect_to("127.0.0.1:6147".to_string()) .with_host_port("openrusty.org:6147".to_string()) - .with_idle_timeout(3000) + .with_idle_timeout(60000) .verify_peer(false) .build() .unwrap(); diff --git a/pingora-core/tests/utils/mod.rs b/pingora-core/tests/utils/mod.rs index d19e65811..ba0979ba1 100644 --- a/pingora-core/tests/utils/mod.rs +++ b/pingora-core/tests/utils/mod.rs @@ -29,7 +29,7 @@ use std::time::Duration; use pingora_core::apps::http_app::ServeHttp; use pingora_core::protocols::http::ServerSession; -use pingora_core::protocols::l4::quic::MAX_IPV6_BUF_SIZE; +use pingora_core::protocols::l4::quic::{QuicHttp3Configs, MAX_IPV6_BUF_SIZE}; #[derive(Clone)] pub struct EchoApp; @@ -95,7 +95,8 @@ fn entry_point(opt: Option) { tls_settings.enable_h2(); listeners.add_tls_with_settings("0.0.0.0:6146", None, tls_settings); - listeners.add_quic("0.0.0.0:6147"); + let configs = QuicHttp3Configs::from_cert_key_path(&cert_path, &key_path).unwrap(); + listeners.add_quic("0.0.0.0:6147", configs); let mut echo_service_http = Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); From 28773f55155bb02ecdcb879a1f3e3af92cef9bce Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Wed, 15 Jan 2025 19:06:01 +0100 Subject: [PATCH 24/52] allow building using rustls & quic-boringssl features add some documentations move socket to socket_details remove HandshakeResponse::Rejected --- pingora-core/Cargo.toml | 3 +- pingora-core/src/apps/mod.rs | 2 +- pingora-core/src/listeners/mod.rs | 5 +- pingora-core/src/protocols/http/server.rs | 2 +- pingora-core/src/protocols/http/v3/mod.rs | 2 +- pingora-core/src/protocols/http/v3/server.rs | 30 ++-- pingora-core/src/protocols/l4/listener.rs | 12 ++ .../src/protocols/l4/quic/listener.rs | 150 ++++++++++-------- pingora-core/src/protocols/l4/quic/mod.rs | 117 ++++++++------ .../src/protocols/l4/quic/tls_handshake.rs | 1 - pingora-core/src/protocols/tls/quic/mod.rs | 26 +-- pingora-core/tests/test_basic.rs | 2 +- pingora-core/tests/utils/mod.rs | 2 +- 13 files changed, 191 insertions(+), 163 deletions(-) delete mode 100644 pingora-core/src/protocols/l4/quic/tls_handshake.rs diff --git a/pingora-core/Cargo.toml b/pingora-core/Cargo.toml index fa8133399..a134e421c 100644 --- a/pingora-core/Cargo.toml +++ b/pingora-core/Cargo.toml @@ -92,9 +92,10 @@ jemallocator = "0.5" [features] default = ["boringssl"] openssl = ["pingora-openssl", "openssl_derived"] -boringssl = ["pingora-boringssl", "openssl_derived", "dep:quiche", "dep:ring"] +boringssl = ["pingora-boringssl", "openssl_derived", "quic-boringssl"] rustls = ["pingora-rustls", "any_tls", "dep:x509-parser", "ouroboros"] patched_http1 = ["pingora-http/patched_http1"] openssl_derived = ["any_tls"] any_tls = [] sentry = ["dep:sentry"] +quic-boringssl = ["dep:quiche", "dep:ring"] diff --git a/pingora-core/src/apps/mod.rs b/pingora-core/src/apps/mod.rs index 1e46562c1..21c099099 100644 --- a/pingora-core/src/apps/mod.rs +++ b/pingora-core/src/apps/mod.rs @@ -245,7 +245,7 @@ where }; return None; } - h3_stream = h3_server::HttpSession::from_h3_conn(&mut h3_conn, digest.clone()) => h3_stream + h3_stream = h3_server::H3Session::from_h3_conn(&mut h3_conn, digest.clone()) => h3_stream }; let h3_stream = match h3_stream { diff --git a/pingora-core/src/listeners/mod.rs b/pingora-core/src/listeners/mod.rs index 26eb71fa8..35ef9223e 100644 --- a/pingora-core/src/listeners/mod.rs +++ b/pingora-core/src/listeners/mod.rs @@ -35,10 +35,9 @@ use std::{fs::Permissions, sync::Arc}; use l4::{ListenerEndpoint, ServerProtocol, Stream as L4Stream}; use tls::{Acceptor, TlsSettings}; -use crate::listeners::l4::UdpSocketOptions; use crate::protocols::l4::quic::QuicHttp3Configs; pub use crate::protocols::tls::ALPN; -pub use l4::{ServerAddress, TcpSocketOptions}; +pub use l4::{ServerAddress, TcpSocketOptions, UdpSocketOptions}; /// The APIs to customize things like certificate during TLS server side handshake #[async_trait] @@ -181,7 +180,7 @@ impl Listeners { )); } - /// Add a QUIC endpoint to `self`, with the given [`UdpSocketOptions`]. + /// Add a QUIC endpoint to `self`, with the given [`UdpSocketOptions`] and [`QuicHttp3Configs`]. pub fn add_quic_with_settings( &mut self, addr: &str, diff --git a/pingora-core/src/protocols/http/server.rs b/pingora-core/src/protocols/http/server.rs index db114a3c4..59c353bad 100644 --- a/pingora-core/src/protocols/http/server.rs +++ b/pingora-core/src/protocols/http/server.rs @@ -16,7 +16,7 @@ use super::v1::server::HttpSession as SessionV1; use super::v2::server::HttpSession as SessionV2; -use super::v3::server::HttpSession as SessionV3; +use super::v3::server::H3Session as SessionV3; use super::HttpTask; use super::{error_resp, HttpVersion}; use crate::protocols::{Digest, SocketAddr, Stream}; diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 01ef7f123..034f7b42f 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -24,7 +24,7 @@ use std::fmt::Debug; pub mod nohash; pub mod server; -pub fn event_to_request_headers(list: &Vec
) -> Result { +fn event_to_request_headers(list: &Vec
) -> Result { let (mut parts, _) = Request::new(()).into_parts(); let mut uri = Uri::builder(); let mut headers = HeaderMap::new(); diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 80f459023..f1979a8af 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -117,6 +117,7 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

, @@ -213,7 +214,10 @@ impl H3Connection { } /// HTTP/3 server session -pub struct HttpSession { +/// [`H3Session`]s contain the converted [`quiche::h3::Event::Headers`] as +/// [`pingora_http::RequestHeader`]. The [`H3Session`] is built around [`pingora_http`] structs and +/// converts to [`quiche::h3::Event`] where needed. +pub struct H3Session { pub(crate) connection_id: ConnectionId<'static>, pub(crate) stream_id: u64, quic_connection: Arc>, @@ -253,7 +257,7 @@ pub struct HttpSession { digest: Arc, } -impl Drop for HttpSession { +impl Drop for H3Session { fn drop(&mut self) { let mut drop_sessions = self.drop_session.lock(); drop_sessions.push_back(self.stream_id); @@ -264,8 +268,8 @@ impl Drop for HttpSession { } } -impl HttpSession { - /// Create a new [`HttpSession`] from the QUIC connection. +impl H3Session { + /// Create a new [`H3Session`] from the QUIC connection. /// This function returns a new HTTP/3 session when the provided HTTP/3 connection, `conn`, /// establishes a new HTTP/3 stream to this server. /// @@ -346,7 +350,7 @@ impl HttpSession { let (event_tx, event_rx) = mpsc::channel(H3_SESSION_EVENTS_CHANNEL_SIZE); - let session = HttpSession { + let session = H3Session { connection_id: conn.connection_id.clone(), stream_id, @@ -476,17 +480,11 @@ impl HttpSession { } /// The request sent from the client - /// - /// Different from its HTTP/1.X counterpart, this function never panics as the request is already - /// read when established a new HTTP/3 stream. pub fn req_header(&self) -> &RequestHeader { &self.request_header } /// A mutable reference to request sent from the client - /// - /// Different from its HTTP/1.X counterpart, this function never panics as the request is already - /// read when established a new HTTP/3 stream. pub fn req_header_mut(&mut self) -> &mut RequestHeader { &mut self.request_header } @@ -540,8 +538,7 @@ impl HttpSession { // not here. /// Write the response header to the client. - /// # the `end` flag - /// `end` marks the end of this session. + /// the `end` flag marks the end of this session. /// If the `end` flag is set, no more header or body can be sent to the client. pub async fn write_response_header( &mut self, @@ -946,7 +943,7 @@ impl HttpSession { /// Give up the stream abruptly. /// - /// This will send a `INTERNAL_ERROR` stream error to the client + /// This will send a `STOP_SENDING` and a `RESET_STREAM` for the Quic stream to the client. pub fn shutdown(&mut self) { if !self.read_ended { self.stream_shutdown(Shutdown::Read, 2u64); @@ -974,7 +971,7 @@ impl HttpSession { buf.freeze() } - /// Whether there is no more body to read + /// Whether there is no more body to read. pub fn is_body_done(&self) -> bool { self.is_body_empty() || self.read_ended } @@ -1011,7 +1008,6 @@ impl HttpSession { }) } - /// `async fn idle() -> Result;` /// This async fn will be pending forever until the client closes the stream/connection /// This function is used for watching client status so that the server is able to cancel /// its internal tasks as the client waiting for the tasks goes away @@ -1022,7 +1018,7 @@ impl HttpSession { } } - /// Similar to `read_body_bytes()` but will be pending after Ok(None) is returned, + /// Similar to `read_body_bytes()` but will be pending after `Ok(None)` is returned, /// until the client closes the connection pub async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result> { if no_body_expected || self.is_body_done() { diff --git a/pingora-core/src/protocols/l4/listener.rs b/pingora-core/src/protocols/l4/listener.rs index f97208bfd..e3bb92cd4 100644 --- a/pingora-core/src/protocols/l4/listener.rs +++ b/pingora-core/src/protocols/l4/listener.rs @@ -15,6 +15,7 @@ //! Listeners use std::io; +use std::os::fd::RawFd; #[cfg(unix)] use std::os::unix::io::AsRawFd; #[cfg(windows)] @@ -121,3 +122,14 @@ impl Listener { } } } + +#[cfg(unix)] +impl AsRawFd for Listener { + fn as_raw_fd(&self) -> RawFd { + match &self { + Self::Quic(l) => l.get_raw_fd(), + Self::Tcp(l) => l.as_raw_fd(), + Self::Unix(l) => l.as_raw_fd(), + } + } +} diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index e71ddeb88..a32f22b2a 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -1,8 +1,8 @@ use crate::protocols::l4::quic::sendto::{detect_gso, set_txtime_sockopt}; use crate::protocols::l4::quic::{ - Connection, ConnectionHandle, Crypto, HandshakeResponse, IncomingHandle, IncomingState, - Listener, SocketDetails, UdpRecv, CONNECTION_DROP_DEQUE_INITIAL_SIZE, - HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, MAX_IPV6_QUIC_DATAGRAM_SIZE, + Connection, ConnectionHandle, HandshakeResponse, IncomingHandle, IncomingState, SocketDetails, + UdpRecv, CONNECTION_DROP_DEQUE_INITIAL_SIZE, HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, + MAX_IPV6_QUIC_DATAGRAM_SIZE, }; use log::{debug, error, trace, warn}; use parking_lot::Mutex; @@ -10,7 +10,8 @@ use pingora_error::{BError, Error, ErrorType}; use quiche::{ConnectionId, Header, RecvInfo, Type}; use ring::hmac::Key; use ring::rand::SystemRandom; -use std::collections::VecDeque; +use std::collections::{HashMap, VecDeque}; +use std::fmt::{Debug, Formatter}; use std::io; use std::io::ErrorKind; use std::net::SocketAddr; @@ -22,55 +23,26 @@ use tokio::sync::mpsc::channel; use crate::protocols::l4::quic::QuicHttp3Configs; use quiche::Connection as QuicheConnection; -impl TryFrom<(UdpSocket, QuicHttp3Configs)> for Listener { - type Error = BError; +/// The [`Listener`] contains a [`HashMap`] linking [`quiche::ConnectionId`] to [`ConnectionHandle`] +/// the `Listener::accept` method returns [`Connection`]s and is responsible to forward network +/// UDP packets to the according `Connection` through the corresponding [`ConnectionHandle`]. +/// +/// In the [`ConnectionHandle::Incoming`] state the UDP packets are forwarded through a +/// [`tokio::sync::mpsc::channel`]. +// Once the state is [`ConnectionHandle::Established`] the packets are directly received on +// the [`quiche::Connection`]. +pub struct Listener { + socket_details: SocketDetails, - fn try_from( - (io, configs): (UdpSocket, QuicHttp3Configs), - ) -> pingora_error::Result { - let addr = io.local_addr().map_err(|e| { - Error::explain( - ErrorType::SocketError, - format!("failed to get local address from socket: {}", e), - ) - })?; - let rng = SystemRandom::new(); - let key = Key::generate(ring::hmac::HMAC_SHA256, &rng).map_err(|e| { - Error::explain( - ErrorType::InternalError, - format!("failed to generate listener key: {}", e), - ) - })?; + configs: QuicHttp3Configs, + crypto: Crypto, - let gso_enabled = detect_gso(&io, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let pacing_enabled = match set_txtime_sockopt(&io) { - Ok(_) => { - debug!("successfully set SO_TXTIME socket option"); - true - } - Err(e) => { - debug!("setsockopt failed {:?}", e); - false - } - }; - - Ok(Listener { - socket: Arc::new(io), - socket_details: SocketDetails { - addr, - gso_enabled, - pacing_enabled, - }, - - configs, - crypto: Crypto { key }, + connections: HashMap, ConnectionHandle>, + drop_connections: Arc>>>, +} - connections: Default::default(), - drop_connections: Arc::new(Mutex::new(VecDeque::with_capacity( - CONNECTION_DROP_DEQUE_INITIAL_SIZE, - ))), - }) - } +pub struct Crypto { + key: Key, } impl Listener { @@ -82,12 +54,12 @@ impl Listener { debug!("endpoint rx loop"); 'read: loop { // receive from network and parse Quic header - let (size, from) = match self.socket.try_recv_from(&mut rx_buf) { + let (size, from) = match self.socket_details.io.try_recv_from(&mut rx_buf) { Ok((size, from)) => (size, from), Err(e) => { if e.kind() == ErrorKind::WouldBlock { // no more UDP packets to read for now, wait for new packets - self.socket.readable().await?; + self.socket_details.io.readable().await?; continue 'read; } else { return Err(e); @@ -170,7 +142,7 @@ impl Listener { established_handle = Some(e.clone()); needs_establish = Some(e); } - HandshakeResponse::Ignored | HandshakeResponse::Rejected => { + HandshakeResponse::Ignored => { // drop connection self.connections.remove(&header.dcid); continue 'read; @@ -248,7 +220,6 @@ impl Listener { configs: self.configs.clone(), - socket: self.socket.clone(), socket_details: self.socket_details.clone(), udp_rx, response: response.clone(), @@ -260,7 +231,6 @@ impl Listener { }, ignore: false, - reject: false, }); let handle = ConnectionHandle::Incoming(IncomingHandle { udp_tx, response }); @@ -289,7 +259,7 @@ impl Listener { Err(e) => { error!("connection {:?} receive error {:?}", conn_id, e); Err(io::Error::new( - io::ErrorKind::BrokenPipe, + ErrorKind::BrokenPipe, format!( "Connection could not receive network data for {:?}. {:?}", conn.destination_id(), @@ -308,18 +278,66 @@ impl Listener { conn_id } - pub(super) fn get_raw_fd(&self) -> RawFd { - self.socket.as_raw_fd() + pub(crate) fn get_raw_fd(&self) -> RawFd { + self.socket_details.io.as_raw_fd() } } -#[cfg(unix)] -impl AsRawFd for crate::protocols::l4::listener::Listener { - fn as_raw_fd(&self) -> RawFd { - match &self { - Self::Quic(l) => l.get_raw_fd(), - Self::Tcp(l) => l.as_raw_fd(), - Self::Unix(l) => l.as_raw_fd(), - } +impl TryFrom<(UdpSocket, QuicHttp3Configs)> for Listener { + type Error = BError; + + fn try_from( + (io, configs): (UdpSocket, QuicHttp3Configs), + ) -> pingora_error::Result { + let addr = io.local_addr().map_err(|e| { + Error::explain( + ErrorType::SocketError, + format!("failed to get local address from socket: {}", e), + ) + })?; + let rng = SystemRandom::new(); + let key = Key::generate(ring::hmac::HMAC_SHA256, &rng).map_err(|e| { + Error::explain( + ErrorType::InternalError, + format!("failed to generate listener key: {}", e), + ) + })?; + + let gso_enabled = detect_gso(&io, MAX_IPV6_QUIC_DATAGRAM_SIZE); + let pacing_enabled = match set_txtime_sockopt(&io) { + Ok(_) => { + debug!("successfully set SO_TXTIME socket option"); + true + } + Err(e) => { + debug!("setsockopt failed {:?}", e); + false + } + }; + + Ok(Listener { + socket_details: SocketDetails { + io: Arc::new(io), + addr, + gso_enabled, + pacing_enabled, + }, + + configs, + crypto: Crypto { key }, + + connections: Default::default(), + drop_connections: Arc::new(Mutex::new(VecDeque::with_capacity( + CONNECTION_DROP_DEQUE_INITIAL_SIZE, + ))), + }) + } +} + +impl Debug for Listener { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Listener") + .field("io", &self.socket_details.io) + .finish() } } diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index b9b5a6fe8..eb23a4c1b 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -4,8 +4,7 @@ use pingora_error::{Error, ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; use quiche::{h3, Config}; use quiche::{ConnectionId, Header, RecvInfo, Stats}; -use ring::hmac::Key; -use std::collections::{HashMap, VecDeque}; +use std::collections::VecDeque; use std::fmt::{Debug, Formatter}; use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; @@ -20,56 +19,55 @@ use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::Notify; use tokio::task::JoinHandle; -pub(crate) mod id_token; mod listener; mod sendto; -pub(crate) mod tls_handshake; + +pub(crate) mod id_token; +pub(crate) use listener::Listener; use crate::protocols::l4::quic::sendto::send_to; use crate::protocols::ConnectionState; // UDP header 8 bytes, IPv4 Header 20 bytes //pub const MAX_IPV4_BUF_SIZE: usize = 65507; -// UDP header 8 bytes, IPv6 Header 40 bytes +/// UDP header 8 bytes, IPv6 Header 40 bytes pub const MAX_IPV6_BUF_SIZE: usize = 65487; -// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472. +// 1500(Ethernet MTU) - 20(IPv4 header) - 8(UDP header) = 1472. //pub const MAX_IPV4_UDP_PACKET_SIZE: usize = 1472; -// 1500(Ethernet) - 40(IPv6 header) - 8(UDP header) = 1452 +/// 1500(Ethernet MTU) - 40(IPv6 header) - 8(UDP header) = 1452 pub const MAX_IPV6_UDP_PACKET_SIZE: usize = 1452; //pub const MAX_IPV4_QUIC_DATAGRAM_SIZE: usize = 1370; +// TODO: validate size (possibly 1200 is the standard) pub const MAX_IPV6_QUIC_DATAGRAM_SIZE: usize = 1350; +/// max. amount of [`UdpRecv`] messages on the `tokio::sync::mpsc::channel` const HANDSHAKE_PACKET_BUFFER_SIZE: usize = 64; +/// initial size for the connection drop deque const CONNECTION_DROP_DEQUE_INITIAL_SIZE: usize = 1024; -pub struct Listener { - socket: Arc, - socket_details: SocketDetails, - - configs: QuicHttp3Configs, - crypto: Crypto, - - connections: HashMap, ConnectionHandle>, - drop_connections: Arc>>>, -} - -pub struct Crypto { - key: Key, -} +// TODO: potentially split more into separate modules +// as of now it is not fully clear which parts will be re-used for the [`Connector`] +/// A [`Connection`] corresponds to a [`ConnectionHandle`]. +/// +/// They are created having the variants [`Connection::Incoming`] / [`ConnectionHandle::Incoming`] +/// and are transitioned to the [`Connection::Established`] / [`ConnectionHandle::Established`] +/// variants once the TLS handshake was successful. pub enum Connection { + /// new connection during handshake Incoming(IncomingState), + /// transitioned once the handshake is successful ([`quiche::Connection::is_established`]) Established(EstablishedState), } +/// corresponds to a new connection before the handshake is completed pub struct IncomingState { pub(crate) connection_id: ConnectionId<'static>, pub(crate) configs: QuicHttp3Configs, pub(crate) drop_connection: Arc>>>, - pub(crate) socket: Arc, pub(crate) socket_details: SocketDetails, pub(crate) udp_rx: Receiver, pub(crate) response: Arc>>, @@ -77,29 +75,41 @@ pub struct IncomingState { pub(crate) dgram: UdpRecv, pub(crate) ignore: bool, - pub(crate) reject: bool, } #[derive(Clone)] pub(crate) struct SocketDetails { + pub(crate) io: Arc, addr: SocketAddr, gso_enabled: bool, pacing_enabled: bool, } +/// can be used to wait for network data or trigger network sending pub struct EstablishedState { pub(crate) connection_id: ConnectionId<'static>, pub(crate) connection: Arc>, - pub(crate) drop_connection: Arc>>>, + pub(crate) http3_config: Arc, + + /// is used to wait for new data received on the connection + /// (e.g. after [`quiche::h3::Connection.poll()`] returned [`quiche::h3::Error::Done`]) pub(crate) rx_notify: Arc, + /// is used to trigger a transmit loop which sends all connection data until [`quiche::h3::Error::Done`] pub(crate) tx_notify: Arc, - pub(crate) socket: Arc, + + /// handle for the ConnectionTx task pub(crate) tx_handle: JoinHandle>, + pub(crate) drop_connection: Arc>>>, + pub(crate) socket: Arc, } +/// A [`ConnectionHandle`] corresponds to a [`Connection`]. +/// For further details please refer to [`Connection`]. pub enum ConnectionHandle { + /// new connection handle during handshake Incoming(IncomingHandle), + /// transitioned once the handshake is successful ([`quiche::Connection::is_established`]) Established(EstablishedHandle), } @@ -113,6 +123,7 @@ impl Debug for ConnectionHandle { } } +/// used to forward data from the UDP socket during the handshake pub struct IncomingHandle { udp_tx: Sender, response: Arc>>, @@ -121,10 +132,10 @@ pub struct IncomingHandle { pub(crate) enum HandshakeResponse { Established(EstablishedHandle), Ignored, - Rejected, - // TODO: TimedOut, + // TODO: TimedOut } +/// is used to forward data from the UDP socket to the Quic connection #[derive(Clone)] pub struct EstablishedHandle { pub(crate) connection_id: ConnectionId<'static>, @@ -133,6 +144,7 @@ pub struct EstablishedHandle { pub(crate) tx_notify: Arc, } +/// the message format used on the [`tokio::sync::mpsc::channel`] during the handshake phase pub struct UdpRecv { pub(crate) pkt: Vec, pub(crate) header: Header<'static>, @@ -219,8 +231,9 @@ impl Drop for Connection { } } -pub(crate) struct ConnectionTx { - pub(crate) socket: Arc, +/// connections transmit task sends data from the [`quiche::Connection`] to the UDP socket +/// the actor is notified through the `tx_notify` and flushes all connection data to the network +pub struct ConnectionTx { pub(crate) socket_details: SocketDetails, pub(crate) connection: Arc>, @@ -230,8 +243,12 @@ pub(crate) struct ConnectionTx { pub(crate) tx_stats: TxStats, } +/// During establishing a [`ConnectionTx`] task is started being responsible to write data from +/// the [`quiche::Connection`] to the `[UdpSocket`]. +/// The connections `Rx` path is part of the [`Listener::accept`] which distributes the datagrams +/// to the according connections. impl ConnectionTx { - pub(crate) async fn start_tx(mut self) -> Result<()> { + pub(crate) async fn start(mut self) -> Result<()> { let id = self.connection_id; let mut out = [0u8; MAX_IPV6_BUF_SIZE]; @@ -306,7 +323,7 @@ impl ConnectionTx { // send to network if let Err(e) = send_to( - &self.socket, + &self.socket_details.io, &out[..total_write], &dst_info, self.tx_stats.max_datagram_size, @@ -344,6 +361,7 @@ impl ConnectionTx { } } +/// used within [`ConnectionTx`] to keep track of the maximum send burst pub struct TxStats { loss_rate: f64, max_send_burst: usize, @@ -374,28 +392,20 @@ impl TxStats { } } -impl AsRawFd for Connection { - fn as_raw_fd(&self) -> RawFd { +impl Connection { + pub(crate) fn local_addr(&self) -> io::Result { match self { - Connection::Incoming(s) => s.socket.as_raw_fd(), - Connection::Established(s) => s.socket.as_raw_fd(), + Connection::Incoming(s) => s.socket_details.io.local_addr(), + Connection::Established(s) => s.socket.local_addr(), } } } -impl Debug for Listener { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Listener") - .field("io", &self.socket) - .finish() - } -} - -impl Connection { - pub(crate) fn local_addr(&self) -> io::Result { +impl AsRawFd for Connection { + fn as_raw_fd(&self) -> RawFd { match self { - Connection::Incoming(s) => s.socket.local_addr(), - Connection::Established(s) => s.socket.local_addr(), + Connection::Incoming(s) => s.socket_details.io.as_raw_fd(), + Connection::Established(s) => s.socket.as_raw_fd(), } } } @@ -426,7 +436,9 @@ impl AsyncWrite for Connection { } } -#[allow(unused_variables)] // TODO: remove +// TODO: consider usage for Quic/Connection/Datagrams +// is there be any source for data in this area (e.g. L4/UDP -> Quic/Dgram, Media Over Quic, ...) +#[allow(unused_variables)] impl AsyncRead for Connection { fn poll_read( self: Pin<&mut Self>, @@ -447,6 +459,9 @@ impl ConnectionState for Connection { } } +/// contains configs for Quic [`quiche::Config`] and Http3 [`quiche::h3::Config`] +/// +/// the configs can be supplied during the [`crate::listeners::Listeners`] creation #[derive(Clone)] pub struct QuicHttp3Configs { quic: Arc>, @@ -474,8 +489,8 @@ impl QuicHttp3Configs { // quic.verify_peer(); default server = false; client = true // quic.discover_pmtu(false); // default false quic.grease(false); // default true - // quic.log_keys() && config.set_keylog(); // logging SSL secrets - // quic.set_ticket_key() // session ticket signer key material + // quic.log_keys() && config.set_keylog(); // logging SSL secrets + // quic.set_ticket_key() // session ticket signer key material //config.enable_early_data(); // can lead to ZeroRTT headers during handshake @@ -529,7 +544,7 @@ impl QuicHttp3Configs { }) } - pub fn from_cert_key_path(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { + pub fn from_cert_key_paths(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { Ok(Self { quic: Arc::new(Mutex::new(Self::new_quic( cert_chain_pem_file, diff --git a/pingora-core/src/protocols/l4/quic/tls_handshake.rs b/pingora-core/src/protocols/l4/quic/tls_handshake.rs deleted file mode 100644 index 8b1378917..000000000 --- a/pingora-core/src/protocols/l4/quic/tls_handshake.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs index 34b8c00b2..5c43bd153 100644 --- a/pingora-core/src/protocols/tls/quic/mod.rs +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -1,7 +1,7 @@ use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; use crate::protocols::l4::quic::{ Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, - IncomingState, TxStats, MAX_IPV6_UDP_PACKET_SIZE, + IncomingState, TxStats, MAX_IPV6_QUIC_DATAGRAM_SIZE, }; use crate::protocols::l4::stream::Stream as L4Stream; use crate::protocols::ConnectionState; @@ -65,7 +65,6 @@ async fn handshake_inner( configs, drop_connection, - socket, socket_details, udp_rx, dgram, @@ -73,7 +72,6 @@ async fn handshake_inner( response, ignore, - reject, } = state; if *ignore { @@ -82,20 +80,11 @@ async fn handshake_inner( *resp = Some(HandshakeResponse::Ignored) } return Ok(None); - } else if *reject { - { - let mut resp = response.lock(); - *resp = Some(HandshakeResponse::Rejected) - } - return Ok(None); - // TODO: send to peer, return err if send fails } + let socket = &socket_details.io; let initial_dcid = dgram.header.dcid.clone(); - - // TODO: use correct buf sizes for IPv4 & IPv6 - // for now use IPv6 values as they are smaller, should work as well on IPv4 - let mut out = [0u8; MAX_IPV6_UDP_PACKET_SIZE]; + let mut out = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; if !quiche::version_is_supported(dgram.header.version) { warn!("Quic packet version received is not supported. Negotiating version..."); @@ -316,7 +305,6 @@ async fn handshake_inner( } let tx = ConnectionTx { - socket: socket.clone(), socket_details: socket_details.clone(), connection_id: connection_id.clone(), connection: connection.clone(), @@ -326,17 +314,17 @@ async fn handshake_inner( }; let state = EstablishedState { - socket: socket.clone(), - tx_handle: tokio::spawn(tx.start_tx()), - connection_id: connection_id.clone(), connection: connection.clone(), - drop_connection: drop_connection.clone(), http3_config: configs.http3().clone(), rx_notify: rx_notify.clone(), tx_notify: tx_notify.clone(), + + tx_handle: tokio::spawn(tx.start()), + drop_connection: drop_connection.clone(), + socket: socket.clone(), }; Ok(Some(state)) diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index 19e39d863..aef80ffe8 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -165,7 +165,7 @@ async fn test_quic_http3_timeout() -> Result<()> { let config = Config::new() .with_connect_to("127.0.0.1:6147".to_string()) .with_host_port("openrusty.org:6147".to_string()) - .with_idle_timeout(60000) + .with_idle_timeout(3000) .verify_peer(false) .build() .unwrap(); diff --git a/pingora-core/tests/utils/mod.rs b/pingora-core/tests/utils/mod.rs index ba0979ba1..4361474e2 100644 --- a/pingora-core/tests/utils/mod.rs +++ b/pingora-core/tests/utils/mod.rs @@ -95,7 +95,7 @@ fn entry_point(opt: Option) { tls_settings.enable_h2(); listeners.add_tls_with_settings("0.0.0.0:6146", None, tls_settings); - let configs = QuicHttp3Configs::from_cert_key_path(&cert_path, &key_path).unwrap(); + let configs = QuicHttp3Configs::from_cert_key_paths(&cert_path, &key_path).unwrap(); listeners.add_quic("0.0.0.0:6147", configs); let mut echo_service_http = From ee077b86787edf9cae1f3207dc8eb0c440cec0c9 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 16 Jan 2025 14:56:26 +0100 Subject: [PATCH 25/52] bump MSRV in workflow to 1.74 --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 055c6fc2f..7301f44d8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,7 +7,7 @@ jobs: strategy: matrix: # nightly, MSRV, and latest stable - toolchain: [nightly, 1.72, 1.82.0] + toolchain: [nightly, 1.74, 1.82.0] runs-on: ubuntu-latest # Only run on "pull_request" event for external PRs. This is to avoid # duplicate builds for PRs created from internal branches. From 3d368f6cea5b6660642196bf2e5317eda0597879 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 16 Jan 2025 18:07:05 +0100 Subject: [PATCH 26/52] fix tx loop continue write reset --- pingora-core/src/protocols/l4/quic/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index eb23a4c1b..3d3892f5b 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -253,9 +253,10 @@ impl ConnectionTx { let mut out = [0u8; MAX_IPV6_BUF_SIZE]; let mut finished_sending = false; - let mut continue_write = false; debug!("connection {:?} tx write", id); 'write: loop { + let mut continue_write = false; + // update stats from connection let max_send_burst = { let conn = self.connection.lock(); From 0429df8670abe1dc16847f8106dbb470728d2e8d Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 17 Jan 2025 12:22:56 +0100 Subject: [PATCH 27/52] impl Ssl for Quic connection, extend ALPN --- pingora-core/Cargo.toml | 4 +- .../listeners/tls/boringssl_openssl/mod.rs | 1 + pingora-core/src/protocols/l4/quic/mod.rs | 39 +++++++++++++++++-- pingora-core/src/protocols/mod.rs | 2 +- pingora-core/src/protocols/tls/mod.rs | 14 ++++++- 5 files changed, 51 insertions(+), 9 deletions(-) diff --git a/pingora-core/Cargo.toml b/pingora-core/Cargo.toml index a134e421c..082633106 100644 --- a/pingora-core/Cargo.toml +++ b/pingora-core/Cargo.toml @@ -67,7 +67,7 @@ zstd = "0" httpdate = "1" x509-parser = { version = "0.16.0", optional = true } ouroboros = { version = "0.18.4", optional = true } -quiche = { git = 'https://github.com/cloudflare/quiche.git', rev = "5d2031ca", optional = true } +quiche = { git = 'https://github.com/cloudflare/quiche.git', rev = "5d2031ca", default-features = false, optional = true } ring = { version = "0.17.8", optional = true } [target.'cfg(unix)'.dependencies] @@ -98,4 +98,4 @@ patched_http1 = ["pingora-http/patched_http1"] openssl_derived = ["any_tls"] any_tls = [] sentry = ["dep:sentry"] -quic-boringssl = ["dep:quiche", "dep:ring"] +quic-boringssl = ["dep:quiche", "dep:ring", "quiche/boringssl-boring-crate"] diff --git a/pingora-core/src/listeners/tls/boringssl_openssl/mod.rs b/pingora-core/src/listeners/tls/boringssl_openssl/mod.rs index 500f9598a..1153a620f 100644 --- a/pingora-core/src/listeners/tls/boringssl_openssl/mod.rs +++ b/pingora-core/src/listeners/tls/boringssl_openssl/mod.rs @@ -113,6 +113,7 @@ impl TlsSettings { .set_alpn_select_callback(alpn::prefer_h2), ALPN::H1 => self.accept_builder.set_alpn_select_callback(alpn::h1_only), ALPN::H2 => self.accept_builder.set_alpn_select_callback(alpn::h2_only), + ALPN::H3 => { /* noop */ } } } diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 3d3892f5b..87f6a2509 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -23,10 +23,11 @@ mod listener; mod sendto; pub(crate) mod id_token; -pub(crate) use listener::Listener; - +use crate::listeners::ALPN; use crate::protocols::l4::quic::sendto::send_to; -use crate::protocols::ConnectionState; +use crate::protocols::tls::{SslDigest, TlsRef}; +use crate::protocols::{ConnectionState, Ssl}; +pub(crate) use listener::Listener; // UDP header 8 bytes, IPv4 Header 20 bytes //pub const MAX_IPV4_BUF_SIZE: usize = 65507; @@ -402,6 +403,36 @@ impl Connection { } } +impl Ssl for Connection { + /// Return the TLS info if the connection is over TLS + fn get_ssl(&self) -> Option<&TlsRef> { + None + } + + /// Return the [`tls::SslDigest`] for logging + fn get_ssl_digest(&self) -> Option> { + match self { + Connection::Incoming(_) => None, + Connection::Established(s) => { + let mut conn = s.connection.lock(); + let conn = &mut *conn; + Some(Arc::from(SslDigest::from_ssl(conn.as_mut()))) + } + } + } + + /// Return selected ALPN if any + fn selected_alpn_proto(&self) -> Option { + match self { + Connection::Incoming(_) => None, + Connection::Established(s) => { + let conn = s.connection.lock(); + ALPN::from_wire_selected(conn.application_proto()) + } + } + } +} + impl AsRawFd for Connection { fn as_raw_fd(&self) -> RawFd { match self { @@ -503,7 +534,7 @@ impl QuicHttp3Configs { // quic.set_application_protos_wire_format(); // quic.set_max_amplification_factor(3); // anti-amplification limit factor; default 3 - quic.set_max_idle_timeout(60 * 1000); // default ulimited + quic.set_max_idle_timeout(600 * 1000); // default ulimited quic.set_max_recv_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // recv default is 65527 quic.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 quic.set_initial_max_data(10_000_000); // 10 Mb diff --git a/pingora-core/src/protocols/mod.rs b/pingora-core/src/protocols/mod.rs index 03b201299..60a903dd6 100644 --- a/pingora-core/src/protocols/mod.rs +++ b/pingora-core/src/protocols/mod.rs @@ -52,7 +52,7 @@ pub trait UniqueID { fn id(&self) -> UniqueIDType; } -/// Interface to get the raw connection for e.g. non-connection based network protocols like UDP/QUIC +/// Interface to get the connection state for e.g. for UDP/QUIC pub trait ConnectionState { fn quic_connection_state(&mut self) -> Option<&mut Connection> { None diff --git a/pingora-core/src/protocols/tls/mod.rs b/pingora-core/src/protocols/tls/mod.rs index acc83bd4a..dbb81a8a8 100644 --- a/pingora-core/src/protocols/tls/mod.rs +++ b/pingora-core/src/protocols/tls/mod.rs @@ -45,6 +45,8 @@ pub enum ALPN { H2, /// Prefer HTTP/2 over HTTP/1.1 H2H1, + /// Prefer HTTP/3 only + H3, } impl std::fmt::Display for ALPN { @@ -53,6 +55,7 @@ impl std::fmt::Display for ALPN { ALPN::H1 => write!(f, "H1"), ALPN::H2 => write!(f, "H2"), ALPN::H2H1 => write!(f, "H2H1"), + ALPN::H3 => write!(f, "H3"), } } } @@ -64,6 +67,8 @@ impl ALPN { ALPN::H1 } else if min == 2 { ALPN::H2 + } else if min == 3 { + ALPN::H3 } else { ALPN::H2H1 } @@ -73,15 +78,17 @@ impl ALPN { pub fn get_max_http_version(&self) -> u8 { match self { ALPN::H1 => 1, - _ => 2, + ALPN::H2 | ALPN::H2H1 => 2, + ALPN::H3 => 3, } } /// Return the min http version this [`ALPN`] allows pub fn get_min_http_version(&self) -> u8 { match self { + ALPN::H3 => 3, ALPN::H2 => 2, - _ => 1, + ALPN::H2H1 | ALPN::H1 => 1, } } @@ -93,6 +100,7 @@ impl ALPN { Self::H1 => b"\x08http/1.1", Self::H2 => b"\x02h2", Self::H2H1 => b"\x02h2\x08http/1.1", + Self::H3 => b"\x02h3", } } @@ -101,6 +109,7 @@ impl ALPN { match raw { b"http/1.1" => Some(Self::H1), b"h2" => Some(Self::H2), + b"h3" => Some(Self::H3), _ => None, } } @@ -111,6 +120,7 @@ impl ALPN { ALPN::H1 => vec![b"http/1.1".to_vec()], ALPN::H2 => vec![b"h2".to_vec()], ALPN::H2H1 => vec![b"h2".to_vec(), b"http/1.1".to_vec()], + ALPN::H3 => vec![b"h3".to_vec()], } } } From 4ddd927722a9d6f9d9039023beda1eb3a3fd0464 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Mon, 20 Jan 2025 10:57:31 +0100 Subject: [PATCH 28/52] connector UDP socket creation, layout connector implementation --- pingora-core/src/connectors/http/mod.rs | 30 ++-- pingora-core/src/connectors/http/v2.rs | 21 +-- pingora-core/src/connectors/http/v3.rs | 93 +++++++++++ pingora-core/src/connectors/l4.rs | 68 +++++++- pingora-core/src/connectors/mod.rs | 6 +- pingora-core/src/listeners/l4.rs | 30 +--- pingora-core/src/protocols/http/client.rs | 36 +++- pingora-core/src/protocols/http/v3/client.rs | 138 +++++++++++++++ pingora-core/src/protocols/http/v3/mod.rs | 1 + pingora-core/src/protocols/http/v3/server.rs | 12 +- pingora-core/src/protocols/l4/ext.rs | 158 +++++++++++++++++- .../src/protocols/l4/quic/connector.rs | 23 +++ .../src/protocols/l4/quic/listener.rs | 42 +---- pingora-core/src/protocols/l4/quic/mod.rs | 81 ++++++++- pingora-core/src/protocols/tls/quic/mod.rs | 35 +++- pingora-core/src/upstreams/peer.rs | 9 + 16 files changed, 668 insertions(+), 115 deletions(-) create mode 100644 pingora-core/src/connectors/http/v3.rs create mode 100644 pingora-core/src/protocols/http/v3/client.rs create mode 100644 pingora-core/src/protocols/l4/quic/connector.rs diff --git a/pingora-core/src/connectors/http/mod.rs b/pingora-core/src/connectors/http/mod.rs index 01339909a..7edfddb31 100644 --- a/pingora-core/src/connectors/http/mod.rs +++ b/pingora-core/src/connectors/http/mod.rs @@ -22,17 +22,20 @@ use std::time::Duration; pub mod v1; pub mod v2; +pub mod v3; pub struct Connector { h1: v1::Connector, h2: v2::Connector, + h3: v3::Connector, } impl Connector { pub fn new(options: Option) -> Self { Connector { h1: v1::Connector::new(options.clone()), - h2: v2::Connector::new(options), + h2: v2::Connector::new(options.clone()), + h3: v3::Connector::new(options), } } @@ -72,8 +75,14 @@ impl Connector { } } let session = self.h2.new_http_session(peer).await?; - Ok((session, false)) + return Ok((session, false)); } + /* + // FIXME: correctly route HTTP3 + let Some(h3) = self.h3.reused_http_session(peer).await?; { + Ok((HttpSession::H3(h3), true)) + } + */ } pub async fn release_http_session( @@ -85,6 +94,7 @@ impl Connector { match session { HttpSession::H1(h1) => self.h1.release_http_session(h1, peer, idle_timeout).await, HttpSession::H2(h2) => self.h2.release_http_session(h2, peer, idle_timeout), + HttpSession::H3(h3) => self.h3.release_http_session(h3, peer, idle_timeout), } } @@ -121,8 +131,8 @@ mod tests { let (h2, reused) = connector.get_http_session(&peer).await.unwrap(); assert!(!reused); match &h2 { - HttpSession::H1(_) => panic!("expect h2"), HttpSession::H2(h2_stream) => assert!(!h2_stream.ping_timedout()), + _ => panic!("expect h2"), } connector.release_http_session(h2, &peer, None).await; @@ -131,8 +141,8 @@ mod tests { // reused this time assert!(reused); match &h2 { - HttpSession::H1(_) => panic!("expect h2"), HttpSession::H2(h2_stream) => assert!(!h2_stream.ping_timedout()), + _ => panic!("expect h2"), } } @@ -147,7 +157,7 @@ mod tests { HttpSession::H1(http) => { get_http(http, 200).await; } - HttpSession::H2(_) => panic!("expect h1"), + _ => panic!("expect h1"), } connector.release_http_session(h1, &peer, None).await; @@ -156,7 +166,7 @@ mod tests { assert!(reused); match &mut h1 { HttpSession::H1(_) => {} - HttpSession::H2(_) => panic!("expect h1"), + _ => panic!("expect h1"), } } @@ -177,7 +187,7 @@ mod tests { HttpSession::H1(http) => { get_http(http, 200).await; } - HttpSession::H2(_) => panic!("expect h1"), + _ => panic!("expect h1"), } connector.release_http_session(h1, &peer, None).await; @@ -189,7 +199,7 @@ mod tests { assert!(reused); match &mut h1 { HttpSession::H1(_) => {} - HttpSession::H2(_) => panic!("expect h1"), + _ => panic!("expect h1"), } } @@ -206,7 +216,7 @@ mod tests { HttpSession::H1(http) => { get_http(http, 200).await; } - HttpSession::H2(_) => panic!("expect h1"), + _ => panic!("expect h1"), } connector.release_http_session(h1, &peer, None).await; @@ -216,7 +226,7 @@ mod tests { assert!(reused); match &mut h1 { HttpSession::H1(_) => {} - HttpSession::H2(_) => panic!("expect h1"), + _ => panic!("expect h1"), } } } diff --git a/pingora-core/src/connectors/http/v2.rs b/pingora-core/src/connectors/http/v2.rs index ad41f6b57..d9b9dafbe 100644 --- a/pingora-core/src/connectors/http/v2.rs +++ b/pingora-core/src/connectors/http/v2.rs @@ -162,19 +162,20 @@ impl ConnectionRef { } } -struct InUsePool { +// FIXME: potentially lift to mod.rs +pub(crate) struct InUsePool { // TODO: use pingora hashmap to shard the lock contention pools: RwLock>>, } impl InUsePool { - fn new() -> Self { + pub(crate) fn new() -> Self { InUsePool { pools: RwLock::new(HashMap::new()), } } - fn insert(&self, reuse_hash: u64, conn: ConnectionRef) { + pub(crate) fn insert(&self, reuse_hash: u64, conn: ConnectionRef) { { let pools = self.pools.read(); if let Some(pool) = pools.get(&reuse_hash) { @@ -192,14 +193,14 @@ impl InUsePool { // retrieve a h2 conn ref to create a new stream // the caller should return the conn ref to this pool if there are still // capacity left for more streams - fn get(&self, reuse_hash: u64) -> Option { + pub(crate) fn get(&self, reuse_hash: u64) -> Option { let pools = self.pools.read(); pools.get(&reuse_hash)?.get_any().map(|v| v.1) } // release a h2_stream, this functional will cause an ConnectionRef to be returned (if exist) // the caller should update the ref and then decide where to put it (in use pool or idle) - fn release(&self, reuse_hash: u64, id: UniqueIDType) -> Option { + pub(crate) fn release(&self, reuse_hash: u64, id: UniqueIDType) -> Option { let pools = self.pools.read(); if let Some(pool) = pools.get(&reuse_hash) { pool.remove(id) @@ -470,7 +471,7 @@ mod tests { peer.options.set_http_version(2, 2); let h2 = connector.new_http_session(&peer).await.unwrap(); match h2 { - HttpSession::H1(_) => panic!("expect h2"), + HttpSession::H1(_) | HttpSession::H3(_) => panic!("expect h2"), HttpSession::H2(h2_stream) => assert!(!h2_stream.ping_timedout()), } } @@ -485,7 +486,7 @@ mod tests { let h2 = connector.new_http_session(&peer).await.unwrap(); match h2 { HttpSession::H1(_) => {} - HttpSession::H2(_) => panic!("expect h1"), + _ => panic!("expect h1"), } } @@ -497,7 +498,7 @@ mod tests { let h2 = connector.new_http_session(&peer).await.unwrap(); match h2 { HttpSession::H1(_) => {} - HttpSession::H2(_) => panic!("expect h1"), + _ => panic!("expect h1"), } } @@ -510,8 +511,8 @@ mod tests { peer.options.max_h2_streams = 1; let h2 = connector.new_http_session(&peer).await.unwrap(); let h2_1 = match h2 { - HttpSession::H1(_) => panic!("expect h2"), HttpSession::H2(h2_stream) => h2_stream, + _ => panic!("expect h2"), }; let id = h2_1.conn.id(); @@ -542,8 +543,8 @@ mod tests { peer.options.max_h2_streams = 3; let h2 = connector.new_http_session(&peer).await.unwrap(); let h2_1 = match h2 { - HttpSession::H1(_) => panic!("expect h2"), HttpSession::H2(h2_stream) => h2_stream, + _ => panic!("expect h2"), }; let id = h2_1.conn.id(); diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs new file mode 100644 index 000000000..016190be6 --- /dev/null +++ b/pingora-core/src/connectors/http/v3.rs @@ -0,0 +1,93 @@ +// FIXME: implement request spawning +// ConnectorOptions contains CA file path from ServerConfig + +use crate::connectors::http::v2::{ConnectionRef, InUsePool}; +use crate::connectors::{ConnectorOptions, TransportConnector}; +use crate::protocols::http::v2::client::Http2Session; +use crate::protocols::http::v3::client::Http3Session; +use crate::upstreams::peer::Peer; +use pingora_pool::ConnectionPool; +use std::sync::Arc; +use std::time::Duration; + +/// Http3 connector +pub struct Connector { + // just for creating connections, the Stream of h2 should be reused + transport: TransportConnector, + // the h2 connection idle pool + //idle_pool: Arc>, + // the pool of h2 connections that have ongoing streams + //in_use_pool: crate::connectors::http::v2::InUsePool, + in_use_pool: InUsePool, + // the h3 connection idle pool + idle_pool: Arc>, +} + +const DEFAULT_POOL_SIZE: usize = 128; + +impl Connector { + pub fn new(options: Option) -> Self { + let pool_size = options + .as_ref() + .map_or(DEFAULT_POOL_SIZE, |o| o.keepalive_pool_size); + // connection offload is handled by the [TransportConnector] + Self { + transport: TransportConnector::new(options), + idle_pool: Arc::new(ConnectionPool::new(pool_size)), + in_use_pool: InUsePool::new(), + } + } + + /// Try to create a new http3 stream from any existing H3 connection. + /// + /// None means there is no "free" connection left. + pub async fn reused_http_session( + &self, + peer: &P, + ) -> pingora_error::Result> { + // check in use pool first so that we use fewer total connections + // then idle pool + let reuse_hash = peer.reuse_hash(); + + // NOTE: We grab a conn from the pools, create a new stream and put the conn back if the + // conn has more free streams. During this process another caller could arrive but is not + // able to find the conn even the conn has free stream to use. + // We accept this false negative to keep the implementation simple. This false negative + // makes an actual impact when there are only a few connection. + // Alternative design 1. given each free stream a conn object: a lot of Arc<> + // Alternative design 2. mutex the pool, which creates lock contention when concurrency is high + // Alternative design 3. do not pop conn from the pool so that multiple callers can grab it + // which will cause issue where spawn_stream() could return None because others call it + // first. Thus a caller might have to retry or give up. This issue is more likely to happen + // when concurrency is high. + let maybe_conn = self + .in_use_pool + .get(reuse_hash) + .or_else(|| self.idle_pool.get(&reuse_hash)); + if let Some(conn) = maybe_conn { + // FIXME: fix types, ConnectionRef = H2 only + let h2_stream = conn.spawn_stream().await?; + if conn.more_streams_allowed() { + self.in_use_pool.insert(reuse_hash, conn); + } + Ok(h2_stream) + } else { + Ok(None) + } + } + + /// Release a finished h3 stream. + /// + /// This function will terminate the [Http3Session]. The corresponding h3 connection will now + /// have one more free stream to use. + /// + /// The h2 connection will be closed after `idle_timeout` if it has no active streams. + pub fn release_http_session( + &self, + session: Http3Session, + peer: &P, + idle_timeout: Option, + ) { + todo!() + } +} diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index e4f106f99..2635151b0 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -25,12 +25,14 @@ use std::os::windows::io::AsRawSocket; #[cfg(unix)] use crate::protocols::l4::ext::connect_uds; use crate::protocols::l4::ext::{ - connect_with as tcp_connect, set_dscp, set_recv_buf, set_tcp_fastopen_connect, + connect_udp_with as udp_connect, connect_with as tcp_connect, set_dscp, set_recv_buf, + set_tcp_fastopen_connect, }; +use crate::protocols::l4::quic::Connection; use crate::protocols::l4::socket::SocketAddr; use crate::protocols::l4::stream::Stream; use crate::protocols::{GetSocketDigest, SocketDigest}; -use crate::upstreams::peer::Peer; +use crate::upstreams::peer::{IpProto, Peer}; /// The interface to establish a L4 connection #[async_trait] @@ -99,10 +101,63 @@ where .await .err_context(|| format!("Fail to establish CONNECT proxy: {}", peer)); } + let peer_ip_proto = peer.ip_proto(); + let peer_addr = peer.address(); - let mut stream: Stream = - if let Some(custom_l4) = peer.get_peer_options().and_then(|o| o.custom_l4.as_ref()) { - custom_l4.connect(peer_addr).await? + + // FIXME: should return an Connection::Outgoing, pre-handshake + // needs to be Udp socket enabled, currently code only handles TCP + let mut stream: Stream = if let Some(custom_l4) = + peer.get_peer_options().and_then(|o| o.custom_l4.as_ref()) + { + custom_l4.connect(peer_addr).await? + } else { + if matches!(peer_ip_proto, IpProto::UDP) { + match peer_addr { + SocketAddr::Inet(addr) => { + let connect_future = udp_connect(addr, bind_to.as_ref(), |socket| { + #[cfg(unix)] + let raw = socket.as_raw_fd(); + #[cfg(windows)] + let raw = socket.as_raw_socket(); + + if let Some(dscp) = peer.dscp() { + debug!("Setting dscp"); + set_dscp(raw, dscp)?; + } + Ok(()) + }); + let conn_res = match peer.connection_timeout() { + Some(t) => pingora_timeout::timeout(t, connect_future) + .await + .explain_err(ConnectTimedout, |_| { + format!("timeout {t:?} connecting to server {peer}") + })?, + None => connect_future.await, + }; + let socket = match conn_res { + Ok(socket) => { + debug!("connected to new server: {}", peer.address()); + Ok(socket.into()) + } + Err(e) => { + let c = format!("Fail to connect to {peer}"); + match e.etype() { + SocketError | BindError => Error::e_because(InternalError, c, e), + _ => Err(e.more_context(c)), + } + } + }?; + + Connection::initiate_outgoing(socket)?.into() + } + SocketAddr::Unix(_addr) => { + // TODO: tokio::net::UnixDatagram support could be an option + // send_to(), recv_from() are using a file path with UnixDatagram + // need to verify if Quic/quiche can handle paths as SocketAddr + todo!() + } + } } else { match peer_addr { SocketAddr::Inet(addr) => { @@ -176,7 +231,8 @@ where } } }? - }; + } + }; let tracer = peer.get_tracer(); if let Some(t) = tracer { diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index 5a126cc70..e47b4795c 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -184,6 +184,7 @@ impl TransportConnector { do_connect(peer, bind_to, alpn_override, &self.tls_ctx.ctx).await? }; + // FIXME: here stream should be Connection::Established Ok(stream) } @@ -327,7 +328,9 @@ async fn do_connect_inner( if peer.tls() { let tls_stream = tls::connect(stream, peer, alpn_override, tls_ctx).await?; Ok(Box::new(tls_stream)) - } else { + } + // FIXME:: call quic::handshake, return Connection::Established + else { Ok(Box::new(stream)) } } @@ -357,6 +360,7 @@ impl PreferredHttpVersion { let v = self.versions.read(); v.get(&key) .copied() + // FIXME: H3 support .map(|v| if v == 1 { ALPN::H1 } else { ALPN::H2H1 }) } } diff --git a/pingora-core/src/listeners/l4.rs b/pingora-core/src/listeners/l4.rs index 57c99ad5b..068a03838 100644 --- a/pingora-core/src/listeners/l4.rs +++ b/pingora-core/src/listeners/l4.rs @@ -12,6 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::protocols::l4::ext::{create_udp_socket, set_dscp, set_tcp_fastopen_backlog}; +use crate::protocols::l4::listener::Listener; +use crate::protocols::l4::quic::{Listener as QuicListener, QuicHttp3Configs}; +pub use crate::protocols::l4::stream::Stream; +use crate::protocols::TcpKeepalive; +#[cfg(unix)] +use crate::server::ListenFds; use log::warn; use pingora_error::{ ErrorType::{AcceptError, BindError}, @@ -30,14 +37,6 @@ use std::os::windows::io::{AsRawSocket, FromRawSocket}; use std::time::Duration; use tokio::net::{TcpSocket, UdpSocket}; -use crate::protocols::l4::ext::{set_dscp, set_tcp_fastopen_backlog}; -use crate::protocols::l4::listener::Listener; -use crate::protocols::l4::quic::{Listener as QuicListener, QuicHttp3Configs}; -pub use crate::protocols::l4::stream::Stream; -use crate::protocols::TcpKeepalive; -#[cfg(unix)] -use crate::server::ListenFds; - const LISTENER_MAX_TRY: usize = 30; const LISTENER_TRY_STEP: Duration = Duration::from_secs(1); // TODO: configurable backlog @@ -319,20 +318,7 @@ async fn bind_udp_socket(addr: &str, opt: Option) -> Result socket2::Socket::new( - socket2::Domain::IPV4, - ty.nonblocking(), - Some(socket2::Protocol::UDP), - ), - SocketAddr::V6(_) => socket2::Socket::new( - socket2::Domain::IPV6, - ty.nonblocking(), - Some(socket2::Protocol::UDP), - ), - } - .or_err_with(BindError, || format!("fail to create address {sock_addr}"))?; + let listener_socket = create_udp_socket(&sock_addr)?; // NOTE: this is to preserve the current UdpListener::bind() behavior. // We have a few tests relying on this behavior to allow multiple identical diff --git a/pingora-core/src/protocols/http/client.rs b/pingora-core/src/protocols/http/client.rs index 6b1e00a87..e3c28765f 100644 --- a/pingora-core/src/protocols/http/client.rs +++ b/pingora-core/src/protocols/http/client.rs @@ -19,28 +19,38 @@ use std::time::Duration; use super::v1::client::HttpSession as Http1Session; use super::v2::client::Http2Session; +use super::v3::client::Http3Session; use crate::protocols::{Digest, SocketAddr, Stream}; /// A type for Http client session. It can be either an Http1 connection or an Http2 stream. pub enum HttpSession { H1(Http1Session), H2(Http2Session), + H3(Http3Session), } impl HttpSession { pub fn as_http1(&self) -> Option<&Http1Session> { match self { Self::H1(s) => Some(s), - Self::H2(_) => None, + _ => None, } } pub fn as_http2(&self) -> Option<&Http2Session> { match self { - Self::H1(_) => None, Self::H2(s) => Some(s), + _ => None, + } + } + + pub fn as_http3(&self) -> Option<&Http3Session> { + match self { + Self::H3(s) => Some(s), + _ => None, } } + /// Write the request header to the server /// After the request header is sent. The caller can either start reading the response or /// sending request body if any. @@ -51,6 +61,7 @@ impl HttpSession { Ok(()) } HttpSession::H2(h2) => h2.write_request_header(req, false), + HttpSession::H3(h3) => h3.write_request_header(req, false), } } @@ -63,6 +74,7 @@ impl HttpSession { Ok(()) } HttpSession::H2(h2) => h2.write_request_body(data, end), + HttpSession::H3(h3) => h3.write_request_body(data, end), } } @@ -74,6 +86,7 @@ impl HttpSession { Ok(()) } HttpSession::H2(h2) => h2.finish_request_body(), + HttpSession::H3(h3) => h3.finish_request_body(), } } @@ -84,6 +97,7 @@ impl HttpSession { match self { HttpSession::H1(h1) => h1.read_timeout = Some(timeout), HttpSession::H2(h2) => h2.read_timeout = Some(timeout), + HttpSession::H3(h3) => h3.read_timeout = Some(timeout), } } @@ -91,11 +105,12 @@ impl HttpSession { /// /// The timeout is per write operation, not on the overall time writing the entire request. /// - /// This is a noop for h2. + /// This is a noop for h2 & h3. pub fn set_write_timeout(&mut self, timeout: Duration) { match self { HttpSession::H1(h1) => h1.write_timeout = Some(timeout), HttpSession::H2(_) => { /* no write timeout because the actual write happens async*/ } + HttpSession::H3(_) => { /* no write timeout as timeout is a connection property */ } } } @@ -109,6 +124,7 @@ impl HttpSession { Ok(()) } HttpSession::H2(h2) => h2.read_response_header().await, + HttpSession::H3(h3) => h3.read_response_header().await, } } @@ -119,6 +135,7 @@ impl HttpSession { match self { HttpSession::H1(h1) => h1.read_body_bytes().await, HttpSession::H2(h2) => h2.read_response_body().await, + HttpSession::H3(h3) => h3.read_response_body().await, } } @@ -127,16 +144,20 @@ impl HttpSession { match self { HttpSession::H1(h1) => h1.is_body_done(), HttpSession::H2(h2) => h2.response_finished(), + HttpSession::H3(h3) => h3.response_finished(), } } /// Give up the http session abruptly. /// For H1 this will close the underlying connection /// For H2 this will send RST_STREAM frame to end this stream if the stream has not ended at all + /// TODO: fix h3 documentation + /// For H3 this will pub async fn shutdown(&mut self) { match self { Self::H1(s) => s.shutdown().await, Self::H2(s) => s.shutdown(), + Self::H3(s) => s.shutdown(), } } @@ -147,6 +168,7 @@ impl HttpSession { match self { Self::H1(s) => s.resp_header(), Self::H2(s) => s.response_header(), + Self::H3(s) => s.response_header(), } } @@ -158,16 +180,19 @@ impl HttpSession { match self { Self::H1(s) => Some(s.digest()), Self::H2(s) => s.digest(), + Self::H3(s) => s.digest(), } } /// Return a mutable [Digest] reference for the connection. /// /// Will return `None` if this is an H2 session and multiple streams are open. + /// TODO: fix h3 documentation pub fn digest_mut(&mut self) -> Option<&mut Digest> { match self { Self::H1(s) => Some(s.digest_mut()), Self::H2(s) => s.digest_mut(), + Self::H3(s) => s.digest_mut(), } } @@ -176,6 +201,7 @@ impl HttpSession { match self { Self::H1(s) => s.server_addr(), Self::H2(s) => s.server_addr(), + Self::H3(s) => s.server_addr(), } } @@ -184,15 +210,17 @@ impl HttpSession { match self { Self::H1(s) => s.client_addr(), Self::H2(s) => s.client_addr(), + Self::H3(s) => s.client_addr(), } } /// Get the reference of the [Stream] that this HTTP/1 session is operating upon. - /// None if the HTTP session is over H2 + /// None if the HTTP session is over H2 or H3 pub fn stream(&self) -> Option<&Stream> { match self { Self::H1(s) => Some(s.stream()), Self::H2(_) => None, + Self::H3(_) => None, } } } diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs new file mode 100644 index 000000000..45bcdb851 --- /dev/null +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -0,0 +1,138 @@ +use crate::connectors::http::v2::ConnectionRef; +use crate::protocols::l4::socket::SocketAddr; +use crate::protocols::{Digest, UniqueIDType}; +use bytes::Bytes; +use h2::client::SendRequest; +use h2::SendStream; +use http::HeaderMap; +use pingora_http::{RequestHeader, ResponseHeader}; +use std::time::Duration; + +// FIXME: implement client H3Session +pub struct Http3Session { + pub read_timeout: Option, + send_req: SendRequest, + conn: ConnectionRef, +} + +impl Http3Session { + pub(crate) fn new(send_req: SendRequest, conn: ConnectionRef) -> Self { + Self { + read_timeout: None, + send_req, + conn, + } + } + + /// Write the request header to the server + pub fn write_request_header( + &mut self, + mut req: Box, + end: bool, + ) -> pingora_error::Result<()> { + todo!() + } + + /// Write a request body chunk + pub fn write_request_body(&mut self, data: Bytes, end: bool) -> pingora_error::Result<()> { + todo!() + } + + /// Signal that the request body has ended + pub fn finish_request_body(&mut self) -> pingora_error::Result<()> { + todo!() + } + + /// Read the response header + pub async fn read_response_header(&mut self) -> pingora_error::Result<()> { + todo!() + } + + /// Read the response body + /// + /// `None` means, no more body to read + pub async fn read_response_body(&mut self) -> pingora_error::Result> { + todo!() + } + + /// Whether the response has ended + pub fn response_finished(&self) -> bool { + todo!() + } + + /// Check whether stream finished with error. + /// Like `response_finished`, but also attempts to poll the h2 stream for errors that may have + /// caused the stream to terminate, and returns them as `H2Error`s. + pub fn check_response_end_or_error(&mut self) -> pingora_error::Result { + todo!() + } + + /// Read the optional trailer headers + pub async fn read_trailers(&mut self) -> pingora_error::Result> { + todo!() + } + + /// The request header if it is already sent + pub fn request_header(&self) -> Option<&RequestHeader> { + todo!() + } + + /// The response header if it is already read + pub fn response_header(&self) -> Option<&ResponseHeader> { + todo!() + } + + /// Give up the http session abruptly. + pub fn shutdown(&mut self) { + todo!() + } + + /// Drop everything in this h2 stream. Return the connection ref. + /// After this function the underlying h2 connection should already notify the closure of this + /// stream so that another stream can be created if needed. + pub(crate) fn conn(&self) -> ConnectionRef { + todo!() + } + + /// Whether ping timeout occurred. After a ping timeout, the h2 connection will be terminated. + /// Ongoing h2 streams will receive an stream/connection error. The streams should check this + /// flag to tell whether the error is triggered by the timeout. + pub(crate) fn ping_timedout(&self) -> bool { + todo!() + } + + /// Return the [Digest] of the connection + /// + /// For reused connection, the timing in the digest will reflect its initial handshakes + /// The caller should check if the connection is reused to avoid misuse the timing field. + pub fn digest(&self) -> Option<&Digest> { + todo!() + } + + /// Return a mutable [Digest] reference for the connection + /// + /// Will return `None` if multiple H2 streams are open. + pub fn digest_mut(&mut self) -> Option<&mut Digest> { + todo!() + } + + /// Return the server (peer) address recorded in the connection digest. + pub fn server_addr(&self) -> Option<&SocketAddr> { + todo!() + } + + /// Return the client (local) address recorded in the connection digest. + pub fn client_addr(&self) -> Option<&SocketAddr> { + todo!() + } + + /// the FD of the underlying connection + pub fn fd(&self) -> UniqueIDType { + todo!() + } + + /// take the body sender to another task to perform duplex read and write + pub fn take_request_body_writer(&mut self) -> Option> { + todo!() + } +} diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 034f7b42f..bdd7c9431 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -21,6 +21,7 @@ use pingora_http::{RequestHeader, ResponseHeader}; use quiche::h3::{Header, NameValue}; use std::fmt::Debug; +pub mod client; pub mod nohash; pub mod server; diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index f1979a8af..5e3e8c08b 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -63,12 +63,6 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ - return Err(Error::explain( - ErrorType::InternalError, - "connection needs to be established, invalid state", - )) - } Connection::Established(state) => { let hconn = { let http3_config = if let Some(h3_options) = options { @@ -94,6 +88,12 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ + return Err(Error::explain( + ErrorType::InternalError, + "connection needs to be established, invalid state", + )) + } }; Ok(H3Connection { diff --git a/pingora-core/src/protocols/l4/ext.rs b/pingora-core/src/protocols/l4/ext.rs index 706fc7b4d..96d9a76f9 100644 --- a/pingora-core/src/protocols/l4/ext.rs +++ b/pingora-core/src/protocols/l4/ext.rs @@ -16,11 +16,13 @@ #![allow(non_camel_case_types)] +use crate::connectors::l4::BindTo; #[cfg(unix)] use libc::socklen_t; #[cfg(target_os = "linux")] use libc::{c_int, c_ulonglong, c_void}; use pingora_error::{Error, ErrorType::*, OrErr, Result}; +use socket2::Socket; use std::io::{self, ErrorKind}; use std::mem; use std::net::SocketAddr; @@ -31,10 +33,7 @@ use std::os::windows::io::{AsRawSocket, RawSocket}; use std::time::Duration; #[cfg(unix)] use tokio::net::UnixStream; -use tokio::net::{TcpSocket, TcpStream}; - -use crate::connectors::l4::BindTo; - +use tokio::net::{TcpSocket, TcpStream, UdpSocket}; /// The (copy of) the kernel struct tcp_info returns #[repr(C)] #[derive(Copy, Clone, Debug)] @@ -517,6 +516,100 @@ async fn inner_connect_with Result<()>>( .map_err(|e| wrap_os_connect_error(e, format!("Fail to connect to {}", *addr))) } +/// connect() to the given address while optionally binding to the specific source address and port range. +/// +/// The `set_socket` callback can be used to tune the socket before `connect()` is called. +/// +/// If a [`BindTo`] is set with a port range and fallback setting enabled this function will retry +/// on EADDRNOTAVAIL ignoring the port range. +/// +/// `IP_BIND_ADDRESS_NO_PORT` is used. +/// `IP_LOCAL_PORT_RANGE` is used if a port range is set on [`BindTo`]. +pub(crate) async fn connect_udp_with Result<()> + Clone>( + addr: &SocketAddr, + bind_to: Option<&BindTo>, + set_socket: F, +) -> Result { + if bind_to.as_ref().map_or(false, |b| b.will_fallback()) { + // if we see an EADDRNOTAVAIL error clear the port range and try again + let connect_result = inner_udp_connect_with(addr, bind_to, set_socket.clone()).await; + if let Err(e) = connect_result.as_ref() { + if matches!(e.etype(), BindError) { + let mut new_bind_to = BindTo::default(); + new_bind_to.addr = bind_to.as_ref().and_then(|b| b.addr); + // reset the port range + new_bind_to.set_port_range(None).unwrap(); + return inner_udp_connect_with(addr, Some(&new_bind_to), set_socket).await; + } + } + connect_result + } else { + // not retryable + inner_udp_connect_with(addr, bind_to, set_socket).await + } +} + +async fn inner_udp_connect_with Result<()>>( + addr: &SocketAddr, + bind_to: Option<&BindTo>, + set_socket: F, +) -> Result { + let socket = create_udp_socket(addr)?; + + #[cfg(unix)] + { + ip_bind_addr_no_port(socket.as_raw_fd(), true).or_err( + SocketError, + "failed to set socket opts IP_BIND_ADDRESS_NO_PORT", + )?; + + if let Some(bind_to) = bind_to { + if let Some((low, high)) = bind_to.port_range() { + ip_local_port_range(socket.as_raw_fd(), low, high) + .or_err(SocketError, "failed to set socket opts IP_LOCAL_PORT_RANGE")?; + } + + if let Some(baddr) = bind_to.addr { + socket + .bind(&baddr.into()) + .or_err_with(BindError, || format!("failed to bind to socket {}", baddr))?; + } + } + } + + #[cfg(windows)] + { + let default_addr = match addr { + SocketAddr::V4(_) => SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), 0), + SocketAddr::V6(_) => SocketAddr::new(std::net::Ipv6Addr::UNSPECIFIED.into(), 0), + }; + socket + .bind(&default_addr.into()) + .or_err(SocketError, "failed to create socket")?; + + if let Some(bind_to) = bind_to { + if let Some(baddr) = bind_to.addr { + socket + .bind(&baddr.into()) + .or_err_with(BindError, || format!("failed to bind to socket {}", baddr))?; + } + }; + } + // TODO: add support for bind on other platforms + + // socket bind() is required for UDP, needs to be done before converting to tokio socket + let socket = + UdpSocket::from_std(socket.into()).or_err(SocketError, "failed to create socket")?; + + set_socket(&socket)?; + + socket + .connect(*addr) + .await + .map_err(|e| wrap_os_connect_error(e, format!("Fail to connect to {}", *addr)))?; + Ok(socket) +} + /// connect() to the given address while optionally binding to the specific source address. /// /// `IP_BIND_ADDRESS_NO_PORT` is used @@ -577,6 +670,25 @@ pub fn set_tcp_keepalive(stream: &TcpStream, ka: &TcpKeepalive) -> Result<()> { set_keepalive(raw, ka).or_err(ConnectError, "failed to set keepalive") } +pub fn create_udp_socket(addr: &SocketAddr) -> Result { + let ty = socket2::Type::DGRAM; + let socket = match addr { + SocketAddr::V4(_) => Socket::new( + socket2::Domain::IPV4, + ty.nonblocking(), + Some(socket2::Protocol::UDP), + ), + SocketAddr::V6(_) => Socket::new( + socket2::Domain::IPV6, + ty.nonblocking(), + Some(socket2::Protocol::UDP), + ), + } + .or_err_with(BindError, || format!("fail to create socket {addr}"))?; + + Ok(socket) +} + #[cfg(test)] mod test { use super::*; @@ -621,4 +733,42 @@ mod test { // connect() return right away as the SYN goes out only when the first write() is called. assert!(connection_time.as_millis() < 4); } + + #[tokio::test] + async fn test_udp_connect() -> Result<()> { + use std::net::Ipv4Addr; + + let addr = "127.0.0.1:7745".parse().unwrap(); + let _remote = UdpSocket::bind(&addr); + + let socket_default = connect_udp_with(&addr, None, |socket| { + assert_eq!( + socket.local_addr().unwrap(), + SocketAddr::new(Ipv4Addr::UNSPECIFIED.into(), 0) + ); + Ok(()) + }) + .await?; + + assert_eq!(socket_default.peer_addr().unwrap(), addr); + Ok(()) + } + + #[tokio::test] + async fn test_udp_connect_bind_addr() -> Result<()> { + let addr = "127.0.0.1:7745".parse().unwrap(); + let _remote = UdpSocket::bind(&addr); + + let mut bind_to = BindTo::default(); + bind_to.addr = Some("127.0.0.1:7750".parse().unwrap()); + + let socket_bind = connect_udp_with(&addr, Some(&bind_to), |socket| { + assert_eq!(socket.local_addr().unwrap(), bind_to.addr.unwrap()); + Ok(()) + }) + .await?; + + assert_eq!(socket_bind.peer_addr().unwrap(), addr); + Ok(()) + } } diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs new file mode 100644 index 000000000..5cc5f89d0 --- /dev/null +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -0,0 +1,23 @@ +use crate::protocols::l4::quic::{detect_gso_pacing, SocketDetails}; +use crate::protocols::l4::quic::{Connection, OutgoingState}; +use pingora_error::{ErrorType, OrErr, Result}; +use std::sync::Arc; +use tokio::net::UdpSocket; + +impl Connection { + pub fn initiate_outgoing(io: UdpSocket) -> Result { + let addr = io.local_addr().explain_err(ErrorType::SocketError, |e| { + format!("failed to get local address from socket: {}", e) + })?; + + let (gso_enabled, pacing_enabled) = detect_gso_pacing(&io); + Ok(Self::Outgoing(OutgoingState { + socket_details: SocketDetails { + io: Arc::new(io), + addr, + gso_enabled, + pacing_enabled, + }, + })) + } +} diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index a32f22b2a..4ecb16fee 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -1,15 +1,13 @@ -use crate::protocols::l4::quic::sendto::{detect_gso, set_txtime_sockopt}; use crate::protocols::l4::quic::{ - Connection, ConnectionHandle, HandshakeResponse, IncomingHandle, IncomingState, SocketDetails, - UdpRecv, CONNECTION_DROP_DEQUE_INITIAL_SIZE, HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, - MAX_IPV6_QUIC_DATAGRAM_SIZE, + detect_gso_pacing, Connection, ConnectionHandle, Crypto, HandshakeResponse, IncomingHandle, + IncomingState, SocketDetails, UdpRecv, CONNECTION_DROP_DEQUE_INITIAL_SIZE, + HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, }; use log::{debug, error, trace, warn}; use parking_lot::Mutex; -use pingora_error::{BError, Error, ErrorType}; +use pingora_error::{BError, ErrorType, OrErr}; use quiche::{ConnectionId, Header, RecvInfo, Type}; use ring::hmac::Key; -use ring::rand::SystemRandom; use std::collections::{HashMap, VecDeque}; use std::fmt::{Debug, Formatter}; use std::io; @@ -41,10 +39,6 @@ pub struct Listener { drop_connections: Arc>>>, } -pub struct Crypto { - key: Key, -} - impl Listener { pub(crate) async fn accept( &mut self, @@ -289,31 +283,11 @@ impl TryFrom<(UdpSocket, QuicHttp3Configs)> for Listener { fn try_from( (io, configs): (UdpSocket, QuicHttp3Configs), ) -> pingora_error::Result { - let addr = io.local_addr().map_err(|e| { - Error::explain( - ErrorType::SocketError, - format!("failed to get local address from socket: {}", e), - ) - })?; - let rng = SystemRandom::new(); - let key = Key::generate(ring::hmac::HMAC_SHA256, &rng).map_err(|e| { - Error::explain( - ErrorType::InternalError, - format!("failed to generate listener key: {}", e), - ) + let addr = io.local_addr().explain_err(ErrorType::SocketError, |e| { + format!("failed to get local address from socket: {}", e) })?; - let gso_enabled = detect_gso(&io, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let pacing_enabled = match set_txtime_sockopt(&io) { - Ok(_) => { - debug!("successfully set SO_TXTIME socket option"); - true - } - Err(e) => { - debug!("setsockopt failed {:?}", e); - false - } - }; + let (gso_enabled, pacing_enabled) = detect_gso_pacing(&io); Ok(Listener { socket_details: SocketDetails { @@ -324,7 +298,7 @@ impl TryFrom<(UdpSocket, QuicHttp3Configs)> for Listener { }, configs, - crypto: Crypto { key }, + crypto: Crypto::new()?, connections: Default::default(), drop_connections: Arc::new(Mutex::new(VecDeque::with_capacity( diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 87f6a2509..b47d2de6a 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -4,6 +4,8 @@ use pingora_error::{Error, ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; use quiche::{h3, Config}; use quiche::{ConnectionId, Header, RecvInfo, Stats}; +use ring::hmac::Key; +use ring::rand::SystemRandom; use std::collections::VecDeque; use std::fmt::{Debug, Formatter}; use std::net::SocketAddr; @@ -22,9 +24,11 @@ use tokio::task::JoinHandle; mod listener; mod sendto; +mod connector; pub(crate) mod id_token; + use crate::listeners::ALPN; -use crate::protocols::l4::quic::sendto::send_to; +use crate::protocols::l4::quic::sendto::{detect_gso, send_to, set_txtime_sockopt}; use crate::protocols::tls::{SslDigest, TlsRef}; use crate::protocols::{ConnectionState, Ssl}; pub(crate) use listener::Listener; @@ -57,13 +61,21 @@ const CONNECTION_DROP_DEQUE_INITIAL_SIZE: usize = 1024; /// and are transitioned to the [`Connection::Established`] / [`ConnectionHandle::Established`] /// variants once the TLS handshake was successful. pub enum Connection { - /// new connection during handshake + /// new outgoing connection while in handshake phase + Outgoing(OutgoingState), + /// new incoming connection while in handshake phase Incoming(IncomingState), /// transitioned once the handshake is successful ([`quiche::Connection::is_established`]) Established(EstablishedState), } -/// corresponds to a new connection before the handshake is completed +/// corresponds to a new outgoing (connector) connection before the handshake is completed +pub struct OutgoingState { + //pub(crate) connection_id: ConnectionId<'static>, + pub(crate) socket_details: SocketDetails, +} + +/// corresponds to a new incoming (listener) connection before the handshake is completed pub struct IncomingState { pub(crate) connection_id: ConnectionId<'static>, pub(crate) configs: QuicHttp3Configs, @@ -152,6 +164,24 @@ pub struct UdpRecv { pub(crate) recv_info: RecvInfo, } +/// cryptographic for generation and validation of connection ids +pub(crate) struct Crypto { + rng: SystemRandom, + key: Key, +} + +impl Crypto { + fn new() -> Result { + let rng = SystemRandom::new(); + let key = Key::generate(ring::hmac::HMAC_SHA256, &rng) + .explain_err(ErrorType::InternalError, |e| { + format!("failed to generate crypto key: {}", e) + })?; + + Ok(Self { rng, key }) + } +} + impl ConnectionHandle { fn establish(&mut self, handle: EstablishedHandle) { match self { @@ -210,7 +240,7 @@ impl Connection { let _ = mem::replace(self, Connection::Established(state)); Ok(()) } - Connection::Established(_) => Err(Error::explain( + _ => Err(Error::explain( ErrorType::InternalError, "establishing connection only possible on incoming connection", )), @@ -221,13 +251,14 @@ impl Connection { impl Drop for Connection { fn drop(&mut self) { match self { - Connection::Incoming(_) => {} Connection::Established(s) => { if !s.tx_handle.is_finished() { s.tx_handle.abort(); debug!("connection {:?} stopped tx task", s.connection_id); } } + // FIXME: handle outgoing (stopping rx loop) + _ => {} } } } @@ -398,6 +429,7 @@ impl Connection { pub(crate) fn local_addr(&self) -> io::Result { match self { Connection::Incoming(s) => s.socket_details.io.local_addr(), + Connection::Outgoing(s) => s.socket_details.io.local_addr(), Connection::Established(s) => s.socket.local_addr(), } } @@ -412,23 +444,23 @@ impl Ssl for Connection { /// Return the [`tls::SslDigest`] for logging fn get_ssl_digest(&self) -> Option> { match self { - Connection::Incoming(_) => None, Connection::Established(s) => { let mut conn = s.connection.lock(); let conn = &mut *conn; Some(Arc::from(SslDigest::from_ssl(conn.as_mut()))) } + _ => None, } } /// Return selected ALPN if any fn selected_alpn_proto(&self) -> Option { match self { - Connection::Incoming(_) => None, Connection::Established(s) => { let conn = s.connection.lock(); ALPN::from_wire_selected(conn.application_proto()) } + _ => None, } } } @@ -436,6 +468,7 @@ impl Ssl for Connection { impl AsRawFd for Connection { fn as_raw_fd(&self) -> RawFd { match self { + Connection::Outgoing(s) => s.socket_details.io.as_raw_fd(), Connection::Incoming(s) => s.socket_details.io.as_raw_fd(), Connection::Established(s) => s.socket.as_raw_fd(), } @@ -501,7 +534,22 @@ pub struct QuicHttp3Configs { } impl QuicHttp3Configs { - pub fn new_quic(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { + pub fn new_quic_connector(trust_origin_ca_pem: Option<&str>) -> Result { + let mut quic = Config::new(quiche::PROTOCOL_VERSION) + .explain_err(ErrorType::InternalError, |_| { + "Failed to create quiche config." + })?; + + if let Some(trust_origin_ca_pem) = trust_origin_ca_pem { + quic.load_verify_locations_from_file(trust_origin_ca_pem) + .explain_err(ErrorType::FileReadError, |_| { + "Could not load trust CA from pem file." + })?; + }; + + Ok(quic) + } + pub fn new_quic_listener(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { let mut quic = Config::new(quiche::PROTOCOL_VERSION) .explain_err(ErrorType::InternalError, |_| { "Failed to create quiche config." @@ -578,7 +626,7 @@ impl QuicHttp3Configs { pub fn from_cert_key_paths(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { Ok(Self { - quic: Arc::new(Mutex::new(Self::new_quic( + quic: Arc::new(Mutex::new(Self::new_quic_listener( cert_chain_pem_file, priv_key_pem_file, )?)), @@ -618,3 +666,18 @@ impl Debug for QuicHttp3Configs { dbg.finish() } } + +fn detect_gso_pacing(io: &UdpSocket) -> (bool, bool) { + let gso_enabled = detect_gso(&io, MAX_IPV6_QUIC_DATAGRAM_SIZE); + let pacing_enabled = match set_txtime_sockopt(&io) { + Ok(_) => { + debug!("successfully set SO_TXTIME socket option"); + true + } + Err(e) => { + debug!("setsockopt failed {:?}", e); + false + } + }; + (gso_enabled, pacing_enabled) +} diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs index 5c43bd153..2f23fdbd0 100644 --- a/pingora-core/src/protocols/tls/quic/mod.rs +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -1,7 +1,7 @@ use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; use crate::protocols::l4::quic::{ Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, - IncomingState, TxStats, MAX_IPV6_QUIC_DATAGRAM_SIZE, + IncomingState, OutgoingState, TxStats, MAX_IPV6_QUIC_DATAGRAM_SIZE, }; use crate::protocols::l4::stream::Stream as L4Stream; use crate::protocols::ConnectionState; @@ -24,8 +24,15 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { + debug_assert!(false, "quic::handshake on already established connection"); + return Err(Error::explain( + ErrorType::HandshakeError, + "handshake state not of type incoming", + )); + } Connection::Incoming(i) => { - if let Some(e_state) = handshake_inner(i).await? { + if let Some(e_state) = handshake_incoming(i).await? { // send HANDSHAKE_DONE Quic frame on established connection e_state.tx_notify.notify_waiters(); Some(e_state) @@ -37,12 +44,16 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { - debug_assert!(false, "quic::handshake on already established connection"); - return Err(Error::explain( - ErrorType::HandshakeError, - "handshake state not of type incoming", - )); + Connection::Outgoing(o) => { + if let Some(_e_state) = handshake_outgoing(o).await? { + todo!(); + } else { + debug!( + "no handshake for connection", + //o.connection_id + ); + None + } } }; @@ -57,7 +68,7 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result pingora_error::Result> { let IncomingState { @@ -330,6 +341,12 @@ async fn handshake_inner( Ok(Some(state)) } +async fn handshake_outgoing( + _state: &mut OutgoingState, +) -> pingora_error::Result> { + Ok(None) +} + // connection io tx directly via socket async fn send_dgram( id: &ConnectionId<'_>, diff --git a/pingora-core/src/upstreams/peer.rs b/pingora-core/src/upstreams/peer.rs index 87bfab63d..1210e5428 100644 --- a/pingora-core/src/upstreams/peer.rs +++ b/pingora-core/src/upstreams/peer.rs @@ -203,6 +203,15 @@ pub trait Peer: Display + Clone { fn get_tracer(&self) -> Option { None } + + fn ip_proto(&self) -> IpProto { + IpProto::TCP + } +} + +pub enum IpProto { + TCP, + UDP, } /// A simple TCP or TLS peer without many complicated settings. From fd29b3f62f93746710066942c4696f0676e0955f Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Mon, 20 Jan 2025 21:50:29 +0100 Subject: [PATCH 29/52] initial Connector/Listener successful handshake --- pingora-core/src/connectors/http/v3.rs | 45 ++ pingora-core/src/connectors/l4.rs | 115 +++- pingora-core/src/connectors/mod.rs | 14 +- pingora-core/src/lib.rs | 1 + pingora-core/src/listeners/l4.rs | 3 +- pingora-core/src/listeners/mod.rs | 4 +- pingora-core/src/protocols/http/v3/server.rs | 4 +- pingora-core/src/protocols/l4/listener.rs | 2 +- .../src/protocols/l4/quic/connector.rs | 142 ++++- .../src/protocols/l4/quic/id_token.rs | 22 + .../src/protocols/l4/quic/listener.rs | 141 ++++- pingora-core/src/protocols/l4/quic/mod.rs | 522 ++++++++---------- pingora-core/src/protocols/mod.rs | 1 + pingora-core/src/protocols/tls/quic/client.rs | 168 ++++++ pingora-core/src/protocols/tls/quic/mod.rs | 393 +------------ pingora-core/src/protocols/tls/quic/server.rs | 361 ++++++++++++ pingora-core/src/upstreams/peer.rs | 10 + pingora-core/tests/test_basic.rs | 5 +- 18 files changed, 1239 insertions(+), 714 deletions(-) create mode 100644 pingora-core/src/protocols/tls/quic/client.rs create mode 100644 pingora-core/src/protocols/tls/quic/server.rs diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 016190be6..0682a761e 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -5,6 +5,7 @@ use crate::connectors::http::v2::{ConnectionRef, InUsePool}; use crate::connectors::{ConnectorOptions, TransportConnector}; use crate::protocols::http::v2::client::Http2Session; use crate::protocols::http::v3::client::Http3Session; +use crate::protocols::l4::quic::Crypto; use crate::upstreams::peer::Peer; use pingora_pool::ConnectionPool; use std::sync::Arc; @@ -21,6 +22,7 @@ pub struct Connector { in_use_pool: InUsePool, // the h3 connection idle pool idle_pool: Arc>, + crypto: Option, } const DEFAULT_POOL_SIZE: usize = 128; @@ -31,10 +33,12 @@ impl Connector { .as_ref() .map_or(DEFAULT_POOL_SIZE, |o| o.keepalive_pool_size); // connection offload is handled by the [TransportConnector] + Self { transport: TransportConnector::new(options), idle_pool: Arc::new(ConnectionPool::new(pool_size)), in_use_pool: InUsePool::new(), + crypto: Crypto::new().ok(), } } @@ -90,4 +94,45 @@ impl Connector { ) { todo!() } + /* + /// Create a new Http3 connection to the given server + pub async fn new_http_session( + &self, + peer: &P, + ) -> Result { + let stream = self.transport.new_stream(peer).await?; + + // check alpn + match stream.selected_alpn_proto() { + Some(crate::protocols::tls::ALPN) => { /* continue */ } + Some(_) => { + // H2 not supported + return Ok(crate::protocols::http::client::HttpSession(Http1Session::new(stream))); + } + None => { + // if tls but no ALPN, default to h1 + // else if plaintext and min http version is 1, this is most likely h1 + if peer.tls() + || peer + .get_peer_options() + .map_or(true, |o| o.alpn.get_min_http_version() == 1) + { + return Ok(HttpSession::H1(Http1Session::new(stream))); + } + // else: min http version=H2 over plaintext, there is no ALPN anyways, we trust + // the caller that the server speaks h2c + } + } + let max_h2_stream = peer.get_peer_options().map_or(1, |o| o.max_h2_streams); + let conn = handshake(stream, max_h2_stream, peer.h2_ping_interval()).await?; + let h2_stream = conn + .spawn_stream() + .await? + .expect("newly created connections should have at least one free stream"); + if conn.more_streams_allowed() { + self.in_use_pool.insert(peer.reuse_hash(), conn); + } + Ok(HttpSession::H2(h2_stream)) + } + */ } diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index 2635151b0..70fac0b96 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -102,16 +102,14 @@ where .err_context(|| format!("Fail to establish CONNECT proxy: {}", peer)); } let peer_ip_proto = peer.ip_proto(); - let peer_addr = peer.address(); - // FIXME: should return an Connection::Outgoing, pre-handshake - // needs to be Udp socket enabled, currently code only handles TCP let mut stream: Stream = if let Some(custom_l4) = peer.get_peer_options().and_then(|o| o.custom_l4.as_ref()) { custom_l4.connect(peer_addr).await? } else { + // FIXME: consider directly using peers ALPN setting for proto selection if matches!(peer_ip_proto, IpProto::UDP) { match peer_addr { SocketAddr::Inet(addr) => { @@ -149,7 +147,8 @@ where } }?; - Connection::initiate_outgoing(socket)?.into() + // FIXME: supply configs & default configs + Connection::initiate(socket, None)?.into() } SocketAddr::Unix(_addr) => { // TODO: tokio::net::UnixDatagram support could be an option @@ -640,3 +639,111 @@ mod tests { assert_eq!(bind_to.port_range, Some((1000, 2000))); } } + +#[cfg(test)] +mod quic_tests { + use crate::apps::http_app::ServeHttp; + use crate::connectors::l4::connect; + use crate::listeners::{Listeners, ALPN}; + use crate::prelude::HttpPeer; + use crate::protocols::http::ServerSession; + use crate::protocols::l4::quic::{Connection, QuicHttp3Configs, MAX_IPV6_BUF_SIZE}; + use crate::protocols::ConnectionState; + use crate::server::Server; + use crate::services::listening::Service; + use async_trait::async_trait; + use bytes::{BufMut, BytesMut}; + use http::{Response, StatusCode}; + use log::{debug, info}; + use pingora_error::Result; + use pingora_timeout::timeout; + use std::thread; + use std::time::Duration; + use crate::connectors::{do_connect, tls}; + + fn quic_listener() { + env_logger::builder() + .format_timestamp(Some(env_logger::TimestampPrecision::Nanos)) + .init(); + + let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR")); + let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR")); + + let mut my_server = Server::new(None).unwrap(); + my_server.bootstrap(); + + let configs = QuicHttp3Configs::from_cert_key_paths(&cert_path, &key_path).unwrap(); + let listeners = Listeners::quic("0.0.0.0:6147", configs).unwrap(); + + let mut echo_service_http = + Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); + echo_service_http.threads = Some(4); + + my_server.add_service(echo_service_http); + my_server.run_forever(); + } + + #[tokio::test] + async fn test_connector_quic_http3() -> Result<()> { + let _server_handle = thread::spawn(|| { + quic_listener(); + }); + info!("Startup completed.."); + + let port = "6147"; + let mut peer = Box::new(HttpPeer::new( + format!("127.0.0.1:{port}"), + false, + "openrusty.org".to_string(), + )); + peer.options.alpn = ALPN::H3; + + let mut pre_handshake_stream = connect(&*peer, None).await?; + assert!(pre_handshake_stream.quic_connection_state().is_some()); + + let tls_connector = tls::Connector::new(None); + let mut stream = do_connect(&*peer, None, None, &tls_connector.ctx).await?; + assert!(stream.quic_connection_state().is_some()); + + let connection = stream.quic_connection_state().unwrap(); + assert!(matches!(connection, Connection::OutgoingEstablished(_))); + Ok(()) + } + + #[derive(Clone)] + pub struct EchoApp; + #[async_trait] + impl ServeHttp for EchoApp { + async fn response(&self, http_stream: &mut ServerSession) -> Response> { + // read timeout of 2s + let read_timeout = 2000; + let body_future = async { + let mut body = BytesMut::with_capacity(MAX_IPV6_BUF_SIZE); + while let Ok(b) = http_stream.read_request_body().await { + match b { + None => break, // finished reading request + Some(b) => body.put(b), + } + } + if body.is_empty() { + body.put("no body!".as_bytes()); + } + body.freeze() + }; + + let body = match timeout(Duration::from_millis(read_timeout), body_future).await { + Ok(res) => res, + Err(_) => { + panic!("Timed out after {:?}ms", read_timeout); + } + }; + + Response::builder() + .status(StatusCode::OK) + .header(http::header::CONTENT_TYPE, "text/html") + .header(http::header::CONTENT_LENGTH, body.len()) + .body(body.to_vec()) + .unwrap() + } + } +} diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index e47b4795c..b35427788 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -24,7 +24,8 @@ mod tls; #[cfg(not(feature = "any_tls"))] use crate::tls::connectors as tls; -use crate::protocols::Stream; +use crate::protocols::tls::quic::client::handshake as quic_handshake; +use crate::protocols::{ConnectionState, Stream}; use crate::server::configuration::ServerConf; use crate::upstreams::peer::{Peer, ALPN}; @@ -328,9 +329,14 @@ async fn do_connect_inner( if peer.tls() { let tls_stream = tls::connect(stream, peer, alpn_override, tls_ctx).await?; Ok(Box::new(tls_stream)) - } - // FIXME:: call quic::handshake, return Connection::Established - else { + } else if stream.is_quic_connection() { + // TODO: use tls_ctx with boringssl & quiche + // currently tls_ctx is already built, but quiche only provides a Config::from_boring() + // accepting a SslContextBuilder, but calling only .build() on it, likely a SslContext + // should be possible when modifying quiche + let quic_stream = quic_handshake(stream, peer, alpn_override, tls_ctx).await?; + Ok(Box::new(quic_stream)) + } else { Ok(Box::new(stream)) } } diff --git a/pingora-core/src/lib.rs b/pingora-core/src/lib.rs index 4f3858083..610c7638d 100644 --- a/pingora-core/src/lib.rs +++ b/pingora-core/src/lib.rs @@ -40,6 +40,7 @@ // This enables the feature that labels modules that are only available with // certain pingora features #![cfg_attr(docsrs, feature(doc_cfg))] +extern crate core; pub mod apps; pub mod connectors; diff --git a/pingora-core/src/listeners/l4.rs b/pingora-core/src/listeners/l4.rs index 068a03838..40c8db414 100644 --- a/pingora-core/src/listeners/l4.rs +++ b/pingora-core/src/listeners/l4.rs @@ -14,7 +14,8 @@ use crate::protocols::l4::ext::{create_udp_socket, set_dscp, set_tcp_fastopen_backlog}; use crate::protocols::l4::listener::Listener; -use crate::protocols::l4::quic::{Listener as QuicListener, QuicHttp3Configs}; +use crate::protocols::l4::quic::listener::Listener as QuicListener; +use crate::protocols::l4::quic::QuicHttp3Configs; pub use crate::protocols::l4::stream::Stream; use crate::protocols::TcpKeepalive; #[cfg(unix)] diff --git a/pingora-core/src/listeners/mod.rs b/pingora-core/src/listeners/mod.rs index 35ef9223e..a0c91e946 100644 --- a/pingora-core/src/listeners/mod.rs +++ b/pingora-core/src/listeners/mod.rs @@ -22,7 +22,7 @@ pub mod tls; #[cfg(not(feature = "any_tls"))] pub use crate::tls::listeners as tls; -use crate::protocols::tls::quic::handshake as quic_handshake; +use crate::protocols::tls::quic::server::handshake as quic_handshake; use crate::protocols::{tls::TlsRef, ConnectionState, Stream}; #[cfg(unix)] @@ -143,7 +143,7 @@ impl Listeners { /// Create a new [`Listeners`] with a QUIC server endpoint from the given string and /// according [`QuicHttp3Configs`]. - pub fn quic(&mut self, addr: &str, configs: QuicHttp3Configs) -> Result { + pub fn quic(addr: &str, configs: QuicHttp3Configs) -> Result { let mut listeners = Self::new(); listeners.add_address(ServerAddress::Udp( addr.into(), diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 5e3e8c08b..56c9b42c9 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -63,7 +63,7 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ + Connection::IncomingEstablished(state) => { let hconn = { let http3_config = if let Some(h3_options) = options { h3_options @@ -72,7 +72,7 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

, + pub(crate) socket_details: SocketDetails, + pub(crate) crypto: Crypto, + pub(crate) configs: QuicHttp3Configs, +} + +/// can be used to wait for network data or trigger network sending +pub struct OutgoingEstablishedState { + pub(crate) connection_id: ConnectionId<'static>, + pub(crate) connection: Arc>, + + pub(crate) http3_config: Arc, + + /// is used to wait for new data received on the connection + /// (e.g. after [`quiche::h3::Connection.poll()`] returned [`quiche::h3::Error::Done`]) + pub(crate) rx_notify: Arc, + /// is used to trigger a transmit loop which sends all connection data until [`quiche::h3::Error::Done`] + pub(crate) tx_notify: Arc, + + pub(crate) socket: Arc, + /// handle for the ConnectionTx task + pub(crate) tx_handle: JoinHandle>, + /// handle for the ConnectionRx task + pub(crate) rx_handle: JoinHandle>, +} impl Connection { - pub fn initiate_outgoing(io: UdpSocket) -> Result { - let addr = io.local_addr().explain_err(ErrorType::SocketError, |e| { + pub fn initiate(io: UdpSocket, configs: Option) -> Result { + let local_addr = io.local_addr().explain_err(ErrorType::SocketError, |e| { format!("failed to get local address from socket: {}", e) })?; + let peer_addr = io.peer_addr().explain_err(ErrorType::SocketError, |e| { + format!("failed to get peer address from socket: {}", e) + })?; + + let configs = configs.unwrap_or(QuicHttp3Configs::try_from( + QuicHttp3Configs::new_quic_connector(None)?, + )?); let (gso_enabled, pacing_enabled) = detect_gso_pacing(&io); - Ok(Self::Outgoing(OutgoingState { + Ok(Self::OutgoingHandshake(OutgoingHandshakeState { + crypto: Crypto::new()?, socket_details: SocketDetails { io: Arc::new(io), - addr, + local_addr, + peer_addr: Some(peer_addr), gso_enabled, pacing_enabled, }, + configs, })) } } + +/// connections receive task receives data from the UDP socket to the [`quiche::Connection`] +/// the task notifies the `rx_notify` when data was received from network for teh connection +pub struct ConnectionRx { + pub(crate) socket_details: SocketDetails, + + pub(crate) connection_id: ConnectionId<'static>, + pub(crate) connection: Arc>, + + pub(crate) rx_notify: Arc, + pub(crate) tx_notify: Arc, +} + +impl ConnectionRx { + pub async fn start(self) -> Result<()> { + let socket = self.socket_details.io; + let local_addr = self.socket_details.local_addr; + let id = self.connection_id; + + // TODO: support ip switching on local & peer address + // would require socket re-binding + let mut buf = [0u8; MAX_IPV6_BUF_SIZE]; + debug!("connection {:?} rx read", id); + 'read: loop { + let (size, recv_info) = match socket.try_recv_from(&mut buf) { + Ok((size, from)) => { + trace!( + "connection {:?} network received from={} length={}", + id, + from, + size + ); + let recv_info = RecvInfo { + from, + to: local_addr, + }; + (size, recv_info) + } + Err(e) => { + if e.kind() == std::io::ErrorKind::WouldBlock { + socket + .readable() + .await + .explain_err(ErrorType::ReadError, |_| { + "failed to wait for readable network socket" + })?; + continue 'read; + } + return Err(e).explain_err(ErrorType::ReadError, |_| { + "failed to receive from network socket" + })?; + } + }; + { + let mut conn = self.connection.lock(); + match conn.recv(&mut buf[..size], recv_info) { + Ok(_size) => { + debug!("connection {:?} received {}", id, size); + self.tx_notify.notify_waiters(); + self.rx_notify.notify_waiters(); + } + Err(e) => { + return Err(e).explain_err(ErrorType::ReadError, |_| { + "failed to receive data from socket on connection" + }); + } + } + } + } + } +} + +impl Drop for OutgoingEstablishedState { + fn drop(&mut self) { + if !self.rx_handle.is_finished() { + self.rx_handle.abort(); + debug!("connection {:?} stopped rx task", self.connection_id) + } + if !self.tx_handle.is_finished() { + self.tx_handle.abort(); + debug!("connection {:?} stopped rx task", self.connection_id) + } + } +} diff --git a/pingora-core/src/protocols/l4/quic/id_token.rs b/pingora-core/src/protocols/l4/quic/id_token.rs index d460c323b..3b3067f4b 100644 --- a/pingora-core/src/protocols/l4/quic/id_token.rs +++ b/pingora-core/src/protocols/l4/quic/id_token.rs @@ -24,6 +24,10 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +use log::trace; +use quiche::{ConnectionId, Header}; +use ring::hmac::Key; +use ring::rand::{SecureRandom, SystemRandom}; use std::net; /// Generate a stateless retry token. @@ -84,3 +88,21 @@ pub(crate) fn validate_token<'a>( Some(quiche::ConnectionId::from_ref(&token[addr.len()..])) } + +pub(crate) fn generate_incoming_cid(key: &Key, hdr: &Header) -> ConnectionId<'static> { + let conn_id = ring::hmac::sign(key, &hdr.dcid); + let conn_id = conn_id.as_ref()[..quiche::MAX_CONN_ID_LEN].to_vec(); + // dcid + let conn_id = ConnectionId::from(conn_id); + trace!("generated incoming connection id {:?}", conn_id); + conn_id +} + +pub(crate) fn generate_outgoing_cid(rng: &SystemRandom) -> ConnectionId<'static> { + let mut conn_id = [0; quiche::MAX_CONN_ID_LEN]; + rng.fill(&mut conn_id[..]).unwrap(); + // scid + let conn_id = ConnectionId::from(conn_id.to_vec()); + trace!("generated outgoing connection id {:?}", conn_id); + conn_id +} diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index 4ecb16fee..3758ed7cd 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -1,31 +1,111 @@ +use crate::protocols::l4::quic::id_token::generate_incoming_cid; +use crate::protocols::l4::quic::QuicHttp3Configs; use crate::protocols::l4::quic::{ - detect_gso_pacing, Connection, ConnectionHandle, Crypto, HandshakeResponse, IncomingHandle, - IncomingState, SocketDetails, UdpRecv, CONNECTION_DROP_DEQUE_INITIAL_SIZE, + detect_gso_pacing, Connection, Crypto, SocketDetails, CONNECTION_DROP_DEQUE_INITIAL_SIZE, HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, }; use log::{debug, error, trace, warn}; use parking_lot::Mutex; use pingora_error::{BError, ErrorType, OrErr}; -use quiche::{ConnectionId, Header, RecvInfo, Type}; -use ring::hmac::Key; +use quiche::{h3, Connection as QuicheConnection, ConnectionId, Header, RecvInfo, Type}; use std::collections::{HashMap, VecDeque}; use std::fmt::{Debug, Formatter}; -use std::io; use std::io::ErrorKind; use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; use std::sync::Arc; +use std::{io, mem}; use tokio::net::UdpSocket; -use tokio::sync::mpsc::channel; +use tokio::sync::mpsc::{channel, Receiver, Sender}; +use tokio::sync::Notify; +use tokio::task::JoinHandle; -use crate::protocols::l4::quic::QuicHttp3Configs; -use quiche::Connection as QuicheConnection; +/// corresponds to a new incoming (listener) connection before the handshake is completed +pub struct IncomingHandshakeState { + pub(crate) connection_id: ConnectionId<'static>, + pub(crate) configs: QuicHttp3Configs, + pub(crate) drop_connection: Arc>>>, + + pub(crate) socket_details: SocketDetails, + pub(crate) udp_rx: Receiver, + pub(crate) response: Arc>>, + + pub(crate) dgram: UdpRecv, + + pub(crate) ignore: bool, +} + +/// can be used to wait for network data or trigger network sending +pub struct IncomingEstablishedState { + pub(crate) connection_id: ConnectionId<'static>, + pub(crate) connection: Arc>, + + pub(crate) http3_config: Arc, + + /// is used to wait for new data received on the connection + /// (e.g. after [`quiche::h3::Connection.poll()`] returned [`quiche::h3::Error::Done`]) + pub(crate) rx_notify: Arc, + /// is used to trigger a transmit loop which sends all connection data until [`quiche::h3::Error::Done`] + pub(crate) tx_notify: Arc, -/// The [`Listener`] contains a [`HashMap`] linking [`quiche::ConnectionId`] to [`ConnectionHandle`] -/// the `Listener::accept` method returns [`Connection`]s and is responsible to forward network -/// UDP packets to the according `Connection` through the corresponding [`ConnectionHandle`]. + pub(crate) socket: Arc, + /// handle for the ConnectionTx task + pub(crate) tx_handle: JoinHandle>, + pub(crate) drop_connection: Arc>>>, +} + +/// A [`IncomingConnectionHandle`] corresponds to a [`IncomingConnection`]. +/// For further details please refer to [`IncomingConnection`]. +pub enum IncomingConnectionHandle { + /// new connection handle during handshake + Handshake(HandshakeHandle), + /// transitioned once the handshake is successful ([`quiche::Connection::is_established`]) + Established(EstablishedHandle), +} + +impl Debug for IncomingConnectionHandle { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str("ConnectionHandle")?; + match self { + IncomingConnectionHandle::Handshake(_) => f.write_str("::Incoming"), + IncomingConnectionHandle::Established(_) => f.write_str("::Established"), + } + } +} + +/// used to forward data from the UDP socket during the handshake +pub struct HandshakeHandle { + udp_tx: Sender, + response: Arc>>, +} + +pub(crate) enum HandshakeResponse { + Established(EstablishedHandle), + Ignored, + // TODO: TimedOut +} + +/// is used to forward data from the UDP socket to the Quic connection +#[derive(Clone)] +pub struct EstablishedHandle { + pub(crate) connection_id: ConnectionId<'static>, + pub(crate) connection: Arc>, + pub(crate) rx_notify: Arc, + pub(crate) tx_notify: Arc, +} + +/// the message format used on the [`tokio::sync::mpsc::channel`] during the handshake phase +pub struct UdpRecv { + pub(crate) pkt: Vec, + pub(crate) header: Header<'static>, + pub(crate) recv_info: RecvInfo, +} + +/// The [`Listener`] contains a [`HashMap`] linking [`quiche::ConnectionId`] to [`IncomingConnectionHandle`] +/// the `Listener::accept` method returns [`IncomingConnection`]s and is responsible to forward network +/// UDP packets to the according `Connection` through the corresponding [`IncomingConnectionHandle`]. /// -/// In the [`ConnectionHandle::Incoming`] state the UDP packets are forwarded through a +/// In the [`IncomingConnectionHandle::Handshake`] state the UDP packets are forwarded through a /// [`tokio::sync::mpsc::channel`]. // Once the state is [`ConnectionHandle::Established`] the packets are directly received on // the [`quiche::Connection`]. @@ -35,7 +115,7 @@ pub struct Listener { configs: QuicHttp3Configs, crypto: Crypto, - connections: HashMap, ConnectionHandle>, + connections: HashMap, IncomingConnectionHandle>, drop_connections: Arc>>>, } @@ -92,7 +172,7 @@ impl Listener { // connection needs to be able to update source_ids() or destination_ids() let recv_info = RecvInfo { - to: self.socket_details.addr, + to: self.socket_details.local_addr, from, }; @@ -103,7 +183,7 @@ impl Listener { let mut handle; handle = self.connections.get_mut(&conn_id); if handle.is_none() { - conn_id = Self::gen_cid(&self.crypto.key, &header); + conn_id = generate_incoming_cid(&self.crypto.key, &header); handle = self.connections.get_mut(&conn_id); }; @@ -121,7 +201,7 @@ impl Listener { ); let mut needs_establish = None; match handle { - ConnectionHandle::Incoming(i) => { + IncomingConnectionHandle::Handshake(i) => { let resp; { resp = i.response.lock().take(); @@ -146,7 +226,7 @@ impl Listener { udp_tx = Some(i.udp_tx.clone()); } } - ConnectionHandle::Established(e) => { + IncomingConnectionHandle::Established(e) => { established_handle = Some(e.clone()); } } @@ -208,7 +288,7 @@ impl Listener { let response = Arc::new(Mutex::new(None)); debug!("new incoming connection {:?}", conn_id); - let connection = Connection::Incoming(IncomingState { + let connection = Connection::IncomingHandshake(IncomingHandshakeState { connection_id: conn_id.clone(), drop_connection: self.drop_connections.clone(), @@ -226,7 +306,7 @@ impl Listener { ignore: false, }); - let handle = ConnectionHandle::Incoming(IncomingHandle { udp_tx, response }); + let handle = IncomingConnectionHandle::Handshake(HandshakeHandle { udp_tx, response }); self.connections.insert(conn_id, handle); return Ok((connection.into(), from)); @@ -264,14 +344,6 @@ impl Listener { } } - fn gen_cid(key: &Key, hdr: &Header) -> ConnectionId<'static> { - let conn_id = ring::hmac::sign(key, &hdr.dcid); - let conn_id = conn_id.as_ref()[..quiche::MAX_CONN_ID_LEN].to_vec(); - let conn_id = ConnectionId::from(conn_id); - trace!("generated connection id {:?}", conn_id); - conn_id - } - pub(crate) fn get_raw_fd(&self) -> RawFd { self.socket_details.io.as_raw_fd() } @@ -292,7 +364,8 @@ impl TryFrom<(UdpSocket, QuicHttp3Configs)> for Listener { Ok(Listener { socket_details: SocketDetails { io: Arc::new(io), - addr, + local_addr: addr, + peer_addr: None, gso_enabled, pacing_enabled, }, @@ -315,3 +388,15 @@ impl Debug for Listener { .finish() } } + +impl IncomingConnectionHandle { + fn establish(&mut self, handle: EstablishedHandle) { + match self { + IncomingConnectionHandle::Handshake(_) => { + debug!("connection handle {:?} established", handle.connection_id); + let _ = mem::replace(self, IncomingConnectionHandle::Established(handle)); + } + IncomingConnectionHandle::Established(_) => {} + } + } +} diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index b47d2de6a..199a2b794 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -3,12 +3,13 @@ use parking_lot::Mutex; use pingora_error::{Error, ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; use quiche::{h3, Config}; -use quiche::{ConnectionId, Header, RecvInfo, Stats}; +use quiche::{ConnectionId, Stats}; use ring::hmac::Key; use ring::rand::SystemRandom; -use std::collections::VecDeque; + use std::fmt::{Debug, Formatter}; use std::net::SocketAddr; + use std::os::fd::{AsRawFd, RawFd}; use std::pin::Pin; use std::sync::Arc; @@ -17,21 +18,20 @@ use std::{io, mem}; use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio::net::UdpSocket; use tokio::sync::mpsc::error::TryRecvError; -use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::Notify; -use tokio::task::JoinHandle; - -mod listener; -mod sendto; -mod connector; +pub(crate) mod connector; pub(crate) mod id_token; +pub(crate) mod listener; +mod sendto; use crate::listeners::ALPN; use crate::protocols::l4::quic::sendto::{detect_gso, send_to, set_txtime_sockopt}; use crate::protocols::tls::{SslDigest, TlsRef}; use crate::protocols::{ConnectionState, Ssl}; -pub(crate) use listener::Listener; + +use crate::protocols::l4::quic::connector::{OutgoingEstablishedState, OutgoingHandshakeState}; +use crate::protocols::l4::quic::listener::{IncomingEstablishedState, IncomingHandshakeState}; // UDP header 8 bytes, IPv4 Header 20 bytes //pub const MAX_IPV4_BUF_SIZE: usize = 65507; @@ -52,126 +52,44 @@ const HANDSHAKE_PACKET_BUFFER_SIZE: usize = 64; /// initial size for the connection drop deque const CONNECTION_DROP_DEQUE_INITIAL_SIZE: usize = 1024; -// TODO: potentially split more into separate modules -// as of now it is not fully clear which parts will be re-used for the [`Connector`] - -/// A [`Connection`] corresponds to a [`ConnectionHandle`]. +/// Represents a Quic [`Connection`] in either `Incoming` or `Outgoing` direction. /// -/// They are created having the variants [`Connection::Incoming`] / [`ConnectionHandle::Incoming`] -/// and are transitioned to the [`Connection::Established`] / [`ConnectionHandle::Established`] +/// A [`Connection`] of variant `Incoming*` corresponds to a [`IncomingConnectionHandle`]. +/// They are created having e.g. the variants [`Connection::IncomingHandshake`] / [`IncomingConnectionHandle::Handshake`] +/// and are transitioned to the [`Connection::IncomingEstablished`] / [`IncomingConnectionHandle::Established`] /// variants once the TLS handshake was successful. +/// +/// `Outgoing` connections do not have corresponding handles as they are bound to a distinguished +/// socket/quad-tuple and having a distinguished ConnectionRx task. pub enum Connection { - /// new outgoing connection while in handshake phase - Outgoing(OutgoingState), - /// new incoming connection while in handshake phase - Incoming(IncomingState), - /// transitioned once the handshake is successful ([`quiche::Connection::is_established`]) - Established(EstablishedState), -} - -/// corresponds to a new outgoing (connector) connection before the handshake is completed -pub struct OutgoingState { - //pub(crate) connection_id: ConnectionId<'static>, - pub(crate) socket_details: SocketDetails, -} - -/// corresponds to a new incoming (listener) connection before the handshake is completed -pub struct IncomingState { - pub(crate) connection_id: ConnectionId<'static>, - pub(crate) configs: QuicHttp3Configs, - pub(crate) drop_connection: Arc>>>, - - pub(crate) socket_details: SocketDetails, - pub(crate) udp_rx: Receiver, - pub(crate) response: Arc>>, - - pub(crate) dgram: UdpRecv, - - pub(crate) ignore: bool, + /// new incoming connection while in the handshake phase + IncomingHandshake(IncomingHandshakeState), + /// established incoming connection after successful handshake ([`quiche::Connection::is_established`]) + IncomingEstablished(IncomingEstablishedState), + + /// new outgoing connection while in the handshake phase + OutgoingHandshake(OutgoingHandshakeState), + /// established outgoing connection after successful handshake ([`quiche::Connection::is_established`]) + OutgoingEstablished(OutgoingEstablishedState), } #[derive(Clone)] pub(crate) struct SocketDetails { pub(crate) io: Arc, - addr: SocketAddr, + pub(crate) local_addr: SocketAddr, + pub(crate) peer_addr: Option, gso_enabled: bool, pacing_enabled: bool, } -/// can be used to wait for network data or trigger network sending -pub struct EstablishedState { - pub(crate) connection_id: ConnectionId<'static>, - pub(crate) connection: Arc>, - - pub(crate) http3_config: Arc, - - /// is used to wait for new data received on the connection - /// (e.g. after [`quiche::h3::Connection.poll()`] returned [`quiche::h3::Error::Done`]) - pub(crate) rx_notify: Arc, - /// is used to trigger a transmit loop which sends all connection data until [`quiche::h3::Error::Done`] - pub(crate) tx_notify: Arc, - - /// handle for the ConnectionTx task - pub(crate) tx_handle: JoinHandle>, - pub(crate) drop_connection: Arc>>>, - pub(crate) socket: Arc, -} - -/// A [`ConnectionHandle`] corresponds to a [`Connection`]. -/// For further details please refer to [`Connection`]. -pub enum ConnectionHandle { - /// new connection handle during handshake - Incoming(IncomingHandle), - /// transitioned once the handshake is successful ([`quiche::Connection::is_established`]) - Established(EstablishedHandle), -} - -impl Debug for ConnectionHandle { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_str("ConnectionHandle")?; - match self { - ConnectionHandle::Incoming(_) => f.write_str("::Incoming"), - ConnectionHandle::Established(_) => f.write_str("::Established"), - } - } -} - -/// used to forward data from the UDP socket during the handshake -pub struct IncomingHandle { - udp_tx: Sender, - response: Arc>>, -} - -pub(crate) enum HandshakeResponse { - Established(EstablishedHandle), - Ignored, - // TODO: TimedOut -} - -/// is used to forward data from the UDP socket to the Quic connection -#[derive(Clone)] -pub struct EstablishedHandle { - pub(crate) connection_id: ConnectionId<'static>, - pub(crate) connection: Arc>, - pub(crate) rx_notify: Arc, - pub(crate) tx_notify: Arc, -} - -/// the message format used on the [`tokio::sync::mpsc::channel`] during the handshake phase -pub struct UdpRecv { - pub(crate) pkt: Vec, - pub(crate) header: Header<'static>, - pub(crate) recv_info: RecvInfo, -} - /// cryptographic for generation and validation of connection ids pub(crate) struct Crypto { - rng: SystemRandom, + pub(crate) rng: SystemRandom, key: Key, } impl Crypto { - fn new() -> Result { + pub(crate) fn new() -> Result { let rng = SystemRandom::new(); let key = Key::generate(ring::hmac::HMAC_SHA256, &rng) .explain_err(ErrorType::InternalError, |e| { @@ -182,94 +100,13 @@ impl Crypto { } } -impl ConnectionHandle { - fn establish(&mut self, handle: EstablishedHandle) { - match self { - ConnectionHandle::Incoming(_) => { - debug!("connection handle {:?} established", handle.connection_id); - let _ = mem::replace(self, ConnectionHandle::Established(handle)); - } - ConnectionHandle::Established(_) => {} - } - } -} - -impl Connection { - pub(crate) fn establish(&mut self, state: EstablishedState) -> Result<()> { - if cfg!(test) { - let conn = state.connection.lock(); - debug_assert!( - conn.is_established() || conn.is_in_early_data(), - "connection must be established or ready for data" - ) - } - match self { - Connection::Incoming(s) => { - 'drain: loop { - match s.udp_rx.try_recv() { - Ok(mut dgram) => { - let mut conn = state.connection.lock(); - conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) - .explain_err(ErrorType::HandshakeError, |_| { - "receiving dgram failed" - })?; - debug!( - "connection {:?} dgram received while establishing", - s.connection_id - ) - } - Err(e) => { - match e { - TryRecvError::Empty => { - // stop accepting packets - s.udp_rx.close(); - } - TryRecvError::Disconnected => { - // remote already closed channel - } - } - break 'drain; - } - } - } - debug_assert!( - s.udp_rx.is_empty(), - "udp rx channel must be empty when establishing the connection" - ); - debug!("connection {:?} established", state.connection_id); - let _ = mem::replace(self, Connection::Established(state)); - Ok(()) - } - _ => Err(Error::explain( - ErrorType::InternalError, - "establishing connection only possible on incoming connection", - )), - } - } -} - -impl Drop for Connection { - fn drop(&mut self) { - match self { - Connection::Established(s) => { - if !s.tx_handle.is_finished() { - s.tx_handle.abort(); - debug!("connection {:?} stopped tx task", s.connection_id); - } - } - // FIXME: handle outgoing (stopping rx loop) - _ => {} - } - } -} - /// connections transmit task sends data from the [`quiche::Connection`] to the UDP socket /// the actor is notified through the `tx_notify` and flushes all connection data to the network pub struct ConnectionTx { pub(crate) socket_details: SocketDetails, - pub(crate) connection: Arc>, pub(crate) connection_id: ConnectionId<'static>, + pub(crate) connection: Arc>, pub(crate) tx_notify: Arc, pub(crate) tx_stats: TxStats, @@ -425,105 +262,6 @@ impl TxStats { } } -impl Connection { - pub(crate) fn local_addr(&self) -> io::Result { - match self { - Connection::Incoming(s) => s.socket_details.io.local_addr(), - Connection::Outgoing(s) => s.socket_details.io.local_addr(), - Connection::Established(s) => s.socket.local_addr(), - } - } -} - -impl Ssl for Connection { - /// Return the TLS info if the connection is over TLS - fn get_ssl(&self) -> Option<&TlsRef> { - None - } - - /// Return the [`tls::SslDigest`] for logging - fn get_ssl_digest(&self) -> Option> { - match self { - Connection::Established(s) => { - let mut conn = s.connection.lock(); - let conn = &mut *conn; - Some(Arc::from(SslDigest::from_ssl(conn.as_mut()))) - } - _ => None, - } - } - - /// Return selected ALPN if any - fn selected_alpn_proto(&self) -> Option { - match self { - Connection::Established(s) => { - let conn = s.connection.lock(); - ALPN::from_wire_selected(conn.application_proto()) - } - _ => None, - } - } -} - -impl AsRawFd for Connection { - fn as_raw_fd(&self) -> RawFd { - match self { - Connection::Outgoing(s) => s.socket_details.io.as_raw_fd(), - Connection::Incoming(s) => s.socket_details.io.as_raw_fd(), - Connection::Established(s) => s.socket.as_raw_fd(), - } - } -} - -impl Debug for Connection { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("QuicConnection").finish() - } -} - -#[allow(unused_variables)] // TODO: remove -impl AsyncWrite for Connection { - fn poll_write( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &[u8], - ) -> Poll> { - todo!() - } - - fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - // this is called on l4::Stream::drop() - Poll::Ready(Ok(())) - } - - fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - todo!() - } -} - -// TODO: consider usage for Quic/Connection/Datagrams -// is there be any source for data in this area (e.g. L4/UDP -> Quic/Dgram, Media Over Quic, ...) -#[allow(unused_variables)] -impl AsyncRead for Connection { - fn poll_read( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &mut ReadBuf<'_>, - ) -> Poll> { - todo!() - } -} - -impl ConnectionState for Connection { - fn quic_connection_state(&mut self) -> Option<&mut Connection> { - Some(self) - } - - fn is_quic_connection(&self) -> bool { - true - } -} - /// contains configs for Quic [`quiche::Config`] and Http3 [`quiche::h3::Config`] /// /// the configs can be supplied during the [`crate::listeners::Listeners`] creation @@ -547,6 +285,13 @@ impl QuicHttp3Configs { })?; }; + quic.set_application_protos(h3::APPLICATION_PROTOCOL) + .explain_err(ErrorType::InternalError, |_| { + "Failed to set application protocols." + })?; + + quic.grease(false); // default true + Ok(quic) } pub fn new_quic_listener(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { @@ -569,8 +314,8 @@ impl QuicHttp3Configs { // quic.verify_peer(); default server = false; client = true // quic.discover_pmtu(false); // default false quic.grease(false); // default true - // quic.log_keys() && config.set_keylog(); // logging SSL secrets - // quic.set_ticket_key() // session ticket signer key material + // quic.log_keys() && config.set_keylog(); // logging SSL secrets + // quic.set_ticket_key() // session ticket signer key material //config.enable_early_data(); // can lead to ZeroRTT headers during handshake @@ -681,3 +426,192 @@ fn detect_gso_pacing(io: &UdpSocket) -> (bool, bool) { }; (gso_enabled, pacing_enabled) } + +impl Connection { + pub(crate) fn establish_incoming(&mut self, state: IncomingEstablishedState) -> Result<()> { + if cfg!(test) { + let conn = state.connection.lock(); + debug_assert!( + conn.is_established() || conn.is_in_early_data(), + "connection must be established or ready for data" + ) + } + match self { + Connection::IncomingHandshake(s) => { + 'drain: loop { + match s.udp_rx.try_recv() { + Ok(mut dgram) => { + let mut conn = state.connection.lock(); + conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) + .explain_err(ErrorType::HandshakeError, |_| { + "receiving dgram failed" + })?; + debug!( + "connection {:?} dgram received while establishing", + s.connection_id + ) + } + Err(e) => { + match e { + TryRecvError::Empty => { + // stop accepting packets + s.udp_rx.close(); + } + TryRecvError::Disconnected => { + // remote already closed channel + } + } + break 'drain; + } + } + } + debug_assert!( + s.udp_rx.is_empty(), + "udp rx channel must be empty when establishing the connection" + ); + debug!("connection {:?} established", state.connection_id); + let _ = mem::replace(self, Connection::IncomingEstablished(state)); + Ok(()) + } + _ => Err(Error::explain( + ErrorType::InternalError, + "establishing connection only possible on incoming handshake connection", + )), + } + } + + pub(crate) fn establish_outgoing(&mut self, state: OutgoingEstablishedState) -> Result<()> { + if cfg!(test) { + let conn = state.connection.lock(); + debug_assert!( + conn.is_established() || conn.is_in_early_data(), + "connection must be established or ready for data" + ) + } + match self { + Connection::OutgoingHandshake(_) => { + debug!("connection {:?} established", state.connection_id); + let _ = mem::replace(self, Connection::OutgoingEstablished(state)); + Ok(()) + } + _ => Err(Error::explain( + ErrorType::InternalError, + "establishing connection only possible on outgoing handshake connection", + )), + } + } + + pub(crate) fn local_addr(&self) -> io::Result { + match self { + Connection::IncomingHandshake(s) => s.socket_details.io.local_addr(), + Connection::IncomingEstablished(s) => s.socket.local_addr(), + Connection::OutgoingHandshake(s) => s.socket_details.io.local_addr(), + Connection::OutgoingEstablished(s) => s.socket.local_addr(), + } + } +} + +impl Debug for Connection { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("QuicConnection").finish() + } +} + +impl ConnectionState for Connection { + fn quic_connection_state(&mut self) -> Option<&mut Connection> { + Some(self) + } + + fn is_quic_connection(&self) -> bool { + true + } +} + +impl Drop for Connection { + fn drop(&mut self) { + match self { + Connection::IncomingEstablished(s) => { + if !s.tx_handle.is_finished() { + s.tx_handle.abort(); + debug!("connection {:?} stopped tx task", s.connection_id); + } + } + // FIXME: handle outgoing (stopping rx loop) + _ => {} + } + } +} + +impl Ssl for Connection { + /// Return the TLS info if the connection is over TLS + fn get_ssl(&self) -> Option<&TlsRef> { + None + } + + /// Return the [`tls::SslDigest`] for logging + fn get_ssl_digest(&self) -> Option> { + match self { + Connection::IncomingEstablished(s) => { + let mut conn = s.connection.lock(); + let conn = &mut *conn; + Some(Arc::from(SslDigest::from_ssl(conn.as_mut()))) + } + _ => None, + } + } + + /// Return selected ALPN if any + fn selected_alpn_proto(&self) -> Option { + match self { + Connection::IncomingEstablished(s) => { + let conn = s.connection.lock(); + ALPN::from_wire_selected(conn.application_proto()) + } + _ => None, + } + } +} + +impl AsRawFd for Connection { + fn as_raw_fd(&self) -> RawFd { + match self { + Connection::IncomingHandshake(s) => s.socket_details.io.as_raw_fd(), + Connection::IncomingEstablished(s) => s.socket.as_raw_fd(), + Connection::OutgoingHandshake(s) => s.socket_details.io.as_raw_fd(), + Connection::OutgoingEstablished(s) => s.socket.as_raw_fd(), + } + } +} + +#[allow(unused_variables)] // TODO: remove +impl AsyncWrite for Connection { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + todo!() + } + + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + // this is called on l4::Stream::drop() + Poll::Ready(Ok(())) + } + + fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + todo!() + } +} + +// TODO: consider usage for Quic/Connection/Datagrams +// is there be any source for data in this area (e.g. L4/UDP -> Quic/Dgram, Media Over Quic, ...) +#[allow(unused_variables)] +impl AsyncRead for Connection { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + todo!() + } +} diff --git a/pingora-core/src/protocols/mod.rs b/pingora-core/src/protocols/mod.rs index 60a903dd6..163363f62 100644 --- a/pingora-core/src/protocols/mod.rs +++ b/pingora-core/src/protocols/mod.rs @@ -57,6 +57,7 @@ pub trait ConnectionState { fn quic_connection_state(&mut self) -> Option<&mut Connection> { None } + fn is_quic_connection(&self) -> bool { false } diff --git a/pingora-core/src/protocols/tls/quic/client.rs b/pingora-core/src/protocols/tls/quic/client.rs new file mode 100644 index 000000000..6596457d1 --- /dev/null +++ b/pingora-core/src/protocols/tls/quic/client.rs @@ -0,0 +1,168 @@ +use crate::listeners::ALPN; +use crate::protocols::l4::quic::connector::{ + ConnectionRx, OutgoingEstablishedState, OutgoingHandshakeState, +}; +use crate::protocols::l4::quic::id_token::generate_outgoing_cid; +use crate::protocols::l4::quic::{Connection, ConnectionTx, TxStats}; +use crate::protocols::tls::quic::handle_connection_errors; +use crate::protocols::IO; +use crate::upstreams::peer::Peer; +use log::{info, trace}; +use parking_lot::Mutex; +use pingora_boringssl::ssl::SslConnector; +use pingora_error::ErrorType::HandshakeError; +use pingora_error::{Error, ErrorType, OrErr}; +use std::sync::Arc; +use tokio::sync::Notify; + +pub(crate) async fn handshake( + mut stream: T, + peer: &P, + alpn_override: Option, + tls_ctx: &SslConnector, +) -> pingora_error::Result +where + T: IO, + P: Peer + Send + Sync, +{ + let Some(connection) = stream.quic_connection_state() else { + debug_assert!(false, "quic::handshake called on stream of another type"); + return Err(Error::explain( + ErrorType::InternalError, + "stream is not a quic stream", + )); + }; + + let e_state = match connection { + Connection::IncomingHandshake(_) | Connection::IncomingEstablished(_) => { + debug_assert!(false, "client handshake on server connection"); + return Err(Error::explain( + ErrorType::InternalError, + "client handshake on server connection", + )); + } + Connection::OutgoingEstablished(_) => { + debug_assert!(false, "handshake on already established connection"); + return Err(Error::explain( + ErrorType::InternalError, + "handshake state already established", + )); + } + Connection::OutgoingHandshake(o) => { + handshake_outgoing(o, peer, alpn_override, tls_ctx).await? + } + }; + + connection.establish_outgoing(e_state)?; + Ok(stream) +} + +pub(crate) async fn handshake_outgoing

( + state: &mut OutgoingHandshakeState, + peer: &P, + alpn_override: Option, + tls_ctx: &SslConnector, +) -> pingora_error::Result +where + P: Peer + Send + Sync, +{ + let OutgoingHandshakeState { + crypto, + socket_details, + configs, + } = state; + + let conn_id = generate_outgoing_cid(&crypto.rng); + + let local_addr = socket_details.local_addr; + let Some(peer_addr) = socket_details.peer_addr else { + return Err(Error::explain( + HandshakeError, + "peer address for outgoing connection not present", + )); + }; + + let conn = { + let mut config = configs.quic().lock(); + // Create a QUIC connection and initiate handshake. + quiche::connect( + Some(peer.sni()), + &conn_id, + local_addr, + peer_addr, + &mut config, + ) + .explain_err(HandshakeError, |e| { + format!("failed to generate initial handshake packet {:?}", e) + })? + }; + info!( + "connection {:?} outgoing from {:} to {:}", + conn_id, local_addr, peer_addr + ); + + let max_udp_payload_size = conn.max_send_udp_payload_size(); + let connection = Arc::new(Mutex::new(conn)); + + let tx_notify = Arc::new(Notify::new()); + let rx_notify = Arc::new(Notify::new()); + + // starting connection IO + let tx = ConnectionTx { + socket_details: socket_details.clone(), + connection_id: conn_id.clone(), + connection: connection.clone(), + tx_notify: tx_notify.clone(), + tx_stats: TxStats::new(max_udp_payload_size), + }; + let rx = ConnectionRx { + socket_details: socket_details.clone(), + connection_id: conn_id.clone(), + connection: connection.clone(), + rx_notify: rx_notify.clone(), + tx_notify: tx_notify.clone(), + }; + + let e_state = OutgoingEstablishedState { + connection_id: conn_id.clone(), + connection: connection.clone(), + http3_config: configs.http3().clone(), + + socket: socket_details.io.clone(), + rx_notify: rx_notify.clone(), + tx_notify: tx_notify.clone(), + + rx_handle: tokio::task::spawn(rx.start()), + tx_handle: tokio::task::spawn(tx.start()), + }; + + // starting the ConnectionTx task sent the initial handshake packet + loop { + // wait for the response + rx_notify.notified().await; + { + let conn = connection.lock(); + + trace!("connection {:?} established={}, early_data={}, closed={}, draining={}, readable={}, timed_out={}, resumed={}", + conn_id, conn.is_established(), conn.is_in_early_data(), conn.is_closed(), + conn.is_draining(), conn.is_readable(), conn.is_timed_out(), conn.is_resumed()); + trace!( + "connection {:?} peer_error={:?}, local_error={:?}", + conn_id, + conn.peer_error(), + conn.local_error() + ); + + handle_connection_errors(conn_id.clone(), conn.peer_error(), conn.local_error())?; + if conn.is_established() { + // send HANDSHAKE_DONE Quic frame on established connection + e_state.tx_notify.notify_waiters(); + break; + } + } + // send connection data on ConnectionTx task to continue handshake + tx_notify.notify_waiters(); + } + + Ok(e_state) +} diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs index 2f23fdbd0..136f1d0f7 100644 --- a/pingora-core/src/protocols/tls/quic/mod.rs +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -1,380 +1,33 @@ -use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; -use crate::protocols::l4::quic::{ - Connection, ConnectionTx, EstablishedHandle, EstablishedState, HandshakeResponse, - IncomingState, OutgoingState, TxStats, MAX_IPV6_QUIC_DATAGRAM_SIZE, -}; -use crate::protocols::l4::stream::Stream as L4Stream; -use crate::protocols::ConnectionState; -use log::{debug, error, trace, warn}; -use parking_lot::Mutex; -use pingora_error::{Error, ErrorType, OrErr}; +use log::error; +use pingora_error::ErrorType::HandshakeError; +use pingora_error::OrErr; use quiche::ConnectionId; -use std::net::SocketAddr; -use std::sync::Arc; -use tokio::net::UdpSocket; -use tokio::sync::Notify; -pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { - let Some(connection) = stream.quic_connection_state() else { - debug_assert!(false, "quic::handshake called on stream of another type"); - return Err(Error::explain( - ErrorType::InternalError, - "stream is not a quic stream", - )); - }; - - let e_state = match connection { - Connection::Established(_) => { - debug_assert!(false, "quic::handshake on already established connection"); - return Err(Error::explain( - ErrorType::HandshakeError, - "handshake state not of type incoming", - )); - } - Connection::Incoming(i) => { - if let Some(e_state) = handshake_incoming(i).await? { - // send HANDSHAKE_DONE Quic frame on established connection - e_state.tx_notify.notify_waiters(); - Some(e_state) - } else { - debug!( - "handshake either rejected or ignored for connection {:?}", - i.connection_id - ); - None - } - } - Connection::Outgoing(o) => { - if let Some(_e_state) = handshake_outgoing(o).await? { - todo!(); - } else { - debug!( - "no handshake for connection", - //o.connection_id - ); - None - } - } - }; - - if let Some(e_state) = e_state { - connection.establish(e_state)?; - Ok(stream) - } else { - Err(Error::explain( - ErrorType::HandshakeError, - "handshake rejected or ignored", - )) - } -} - -async fn handshake_incoming( - state: &mut IncomingState, -) -> pingora_error::Result> { - let IncomingState { - connection_id: conn_id, - configs, - drop_connection, - - socket_details, - udp_rx, - dgram, - - response, - - ignore, - } = state; - - if *ignore { - { - let mut resp = response.lock(); - *resp = Some(HandshakeResponse::Ignored) - } - return Ok(None); - } - - let socket = &socket_details.io; - let initial_dcid = dgram.header.dcid.clone(); - let mut out = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; - - if !quiche::version_is_supported(dgram.header.version) { - warn!("Quic packet version received is not supported. Negotiating version..."); - let size = quiche::negotiate_version(&dgram.header.scid, &dgram.header.dcid, &mut out) - .explain_err(ErrorType::HandshakeError, |_| { - "creating version negotiation packet failed" - })?; - - // send data to network - send_dgram(conn_id, socket, &out[..size], dgram.recv_info.from) - .await - .explain_err(ErrorType::WriteError, |_| { - "sending version negotiation packet failed" - })?; - - // validate response - if let Some(resp_dgram) = udp_rx.recv().await { - if quiche::version_is_supported(resp_dgram.header.version) { - *dgram = resp_dgram - } else { - return Err(Error::explain( - ErrorType::HandshakeError, - "version negotiation failed as responded version is not supported", - )); - }; - } else { - return Err(Error::explain( - ErrorType::HandshakeError, - "version negotiation did not receive a response", - )); - } - }; - - // token is always present in "Initial" packets - let token = dgram.header.token.as_ref().unwrap(); - // do stateless retry if the client didn't send a token - if token.is_empty() { - trace!( - "connection {:?} stateless retry as Quic header token is empty", - conn_id - ); - - let hdr = &dgram.header; - let new_token = mint_token(hdr, &dgram.recv_info.from); - let size = quiche::retry( - &hdr.scid, - &hdr.dcid, +pub mod client; +pub mod server; + +fn handle_connection_errors( + conn_id: ConnectionId<'_>, + local_error: Option<&quiche::ConnectionError>, + peer_error: Option<&quiche::ConnectionError>, +) -> pingora_error::Result<()> { + if let Some(e) = local_error { + error!( + "connection {:?} local error reason: {}", conn_id, - &new_token, - hdr.version, - &mut out, - ) - .explain_err(ErrorType::HandshakeError, |_| { - "creating retry packet failed" - })?; - - send_dgram(conn_id, socket, &out[..size], dgram.recv_info.from) - .await - .explain_err(ErrorType::WriteError, |_| "sending retry packet failed")?; - - // validate response - if let Some(resp_dgram) = udp_rx.recv().await { - // token is always present in "Initial" packets - let resp_token = resp_dgram.header.token.as_ref().unwrap(); - if resp_token.is_empty() { - return Err(Error::explain( - ErrorType::HandshakeError, - "Stateless retry failed. Still no token available after stateless retry." - .to_string(), - )); - } else { - *dgram = resp_dgram; - }; - } else { - return Err(Error::explain( - ErrorType::HandshakeError, - "Stateless retry did not receive a response.".to_string(), - )); - } - } - - let hdr = &dgram.header; - let token = hdr.token.as_ref().unwrap(); - let odcid = validate_token(&dgram.recv_info.from, token); - - // The token was not valid, meaning the retry failed, so drop the connection. - if odcid.is_none() { - return Err(Error::explain( - ErrorType::HandshakeError, - "Quic header has invalid address validation token.".to_string(), - )); - } - - // The destination id was not valid, so drop the connection. - if conn_id.len() != hdr.dcid.len() { - return Err(Error::explain( - ErrorType::HandshakeError, - "Quic header has invalid destination connection id.".to_string(), - )); - } - - // Reuse the source connection ID we sent in the Retry packet, - // instead of changing it again. - debug!( - "new connection {:?} odcid={:?} scid={:?} ", - hdr.dcid, initial_dcid, hdr.scid - ); - - let mut conn; - { - let mut config = configs.quic().lock(); - conn = quiche::accept( - &hdr.dcid, - Some(&initial_dcid), - dgram.recv_info.to, - dgram.recv_info.from, - &mut config, - ) - .explain_err(ErrorType::HandshakeError, |_| { - "connection instantiation failed" - })?; + String::from_utf8_lossy(e.reason.as_slice()).to_string() + ); + return Err(e).explain_err(HandshakeError, |_| "local error during handshake"); } - // receive quic data into connection - let buf = dgram.pkt.as_mut_slice(); - conn.recv(buf, dgram.recv_info) - .explain_err(ErrorType::HandshakeError, |_| { - "receiving initial data failed" - })?; - - debug!("connection {:?} starting handshake", conn_id); - // RSA handshake requires more than one packet - while !conn.is_established() { - trace!("connection {:?} creating handshake packet", conn_id); - 'tx: loop { - let (size, info) = match conn.send(out.as_mut_slice()) { - Ok((size, info)) => (size, info), - Err(quiche::Error::Done) => break 'tx, - Err(e) => { - return Err(e).explain_err(ErrorType::WriteError, |_| { - "creating handshake packet failed" - }) - } - }; - - trace!("connection {:?} sending handshake packet", conn_id); - send_dgram(conn_id, socket, &out[..size], info.to) - .await - .explain_err(ErrorType::WriteError, |_| "sending handshake packet failed")?; - } - - trace!("connection {:?} waiting for handshake response", conn_id); - 'rx: loop { - if let Some(mut dgram) = udp_rx.recv().await { - trace!("connection {:?} received handshake response", conn_id); - conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) - .explain_err(ErrorType::HandshakeError, |_| { - "receiving handshake response failed" - })?; - } else { - return Err(Error::explain( - ErrorType::HandshakeError, - "finishing handshake failed, did not receive a response", - )); - } - if udp_rx.is_empty() { - break 'rx; - } - } - - trace!("connection {:?} established={}, early_data={}, closed={}, draining={}, readable={}, timed_out={}, resumed={}", - conn_id, conn.is_established(), conn.is_in_early_data(), conn.is_closed(), - conn.is_draining(), conn.is_readable(), conn.is_timed_out(), conn.is_resumed()); - - trace!( - "connection {:?} peer_error={:?}, local_error={:?}", + if let Some(e) = peer_error { + error!( + "connection {:?} peer error reason: {}", conn_id, - conn.peer_error(), - conn.local_error() + String::from_utf8_lossy(e.reason.as_slice()).to_string() ); - if let Some(e) = conn.peer_error() { - error!( - "connection {:?} peer error reason: {}", - conn_id, - String::from_utf8_lossy(e.reason.as_slice()).to_string() - ); - } - if let Some(e) = conn.local_error() { - error!( - "connection {:?} local error reason: {}", - conn_id, - String::from_utf8_lossy(e.reason.as_slice()).to_string() - ); - } - } - - let max_send_udp_payload_size = conn.max_send_udp_payload_size(); - let connection_id = conn_id; - let connection = Arc::new(Mutex::new(conn)); - let tx_notify = Arc::new(Notify::new()); - let rx_notify = Arc::new(Notify::new()); - - debug!( - "connection {:?} handshake successful, udp_rx {}", - connection_id, - udp_rx.len() - ); - let handle = EstablishedHandle { - connection_id: connection_id.clone(), - connection: connection.clone(), - rx_notify: rx_notify.clone(), - tx_notify: tx_notify.clone(), - }; - - { - let mut resp = response.lock(); - *resp = Some(HandshakeResponse::Established(handle)); + return Err(e).explain_err(HandshakeError, |_| "peer error during handshake"); } - let tx = ConnectionTx { - socket_details: socket_details.clone(), - connection_id: connection_id.clone(), - connection: connection.clone(), - - tx_notify: tx_notify.clone(), - tx_stats: TxStats::new(max_send_udp_payload_size), - }; - - let state = EstablishedState { - connection_id: connection_id.clone(), - connection: connection.clone(), - - http3_config: configs.http3().clone(), - - rx_notify: rx_notify.clone(), - tx_notify: tx_notify.clone(), - - tx_handle: tokio::spawn(tx.start()), - drop_connection: drop_connection.clone(), - socket: socket.clone(), - }; - - Ok(Some(state)) -} - -async fn handshake_outgoing( - _state: &mut OutgoingState, -) -> pingora_error::Result> { - Ok(None) -} - -// connection io tx directly via socket -async fn send_dgram( - id: &ConnectionId<'_>, - io: &Arc, - buf: &[u8], - to: SocketAddr, -) -> pingora_error::Result { - match io.send_to(buf, &to).await { - Ok(sent) => { - debug_assert_eq!( - sent, - buf.len(), - "amount of network sent data does not correspond to packet size" - ); - trace!( - "connection {:?} sent dgram to={:?} length={:?} ", - id, - to, - buf.len() - ); - Ok(sent) - } - Err(e) => { - error!("Failed sending packet via UDP. Error: {:?}", e); - Err(Error::explain( - ErrorType::WriteError, - format!("Failed sending packet via UDP. Error: {:?}", e), - )) - } - } + Ok(()) } diff --git a/pingora-core/src/protocols/tls/quic/server.rs b/pingora-core/src/protocols/tls/quic/server.rs new file mode 100644 index 000000000..bd565c9bf --- /dev/null +++ b/pingora-core/src/protocols/tls/quic/server.rs @@ -0,0 +1,361 @@ +use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; +use crate::protocols::l4::quic::listener::{ + EstablishedHandle, HandshakeResponse, IncomingEstablishedState, IncomingHandshakeState, +}; +use crate::protocols::l4::quic::{Connection, ConnectionTx, TxStats, MAX_IPV6_QUIC_DATAGRAM_SIZE}; +use crate::protocols::l4::stream::Stream as L4Stream; +use crate::protocols::tls::quic::handle_connection_errors; +use crate::protocols::ConnectionState; +use log::{debug, error, trace, warn}; +use parking_lot::Mutex; +use pingora_error::{Error, ErrorType, OrErr}; +use quiche::ConnectionId; +use std::net::SocketAddr; +use std::sync::Arc; +use tokio::net::UdpSocket; +use tokio::sync::Notify; + +pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { + let Some(connection) = stream.quic_connection_state() else { + debug_assert!(false, "quic::handshake called on stream of another type"); + return Err(Error::explain( + ErrorType::InternalError, + "stream is not a quic stream", + )); + }; + + let e_state = match connection { + Connection::IncomingEstablished(_) => { + debug_assert!(false, "quic::handshake on already established connection"); + return Err(Error::explain( + ErrorType::InternalError, + "handshake state already established", + )); + } + Connection::IncomingHandshake(i) => { + if let Some(e_state) = handshake_incoming(i).await? { + // send HANDSHAKE_DONE Quic frame on established connection + e_state.tx_notify.notify_waiters(); + Some(e_state) + } else { + debug!( + "handshake either rejected or ignored for connection {:?}", + i.connection_id + ); + None + } + } + Connection::OutgoingHandshake(_) | Connection::OutgoingEstablished(_) => { + debug_assert!(false, "server handshake on client connection"); + return Err(Error::explain( + ErrorType::InternalError, + "server handshake on client connection", + )); + } + }; + + if let Some(e_state) = e_state { + connection.establish_incoming(e_state)?; + Ok(stream) + } else { + Err(Error::explain( + ErrorType::HandshakeError, + "handshake rejected or ignored", + )) + } +} + +async fn handshake_incoming( + state: &mut IncomingHandshakeState, +) -> pingora_error::Result> { + let IncomingHandshakeState { + connection_id: conn_id, + configs, + drop_connection, + + socket_details, + udp_rx, + dgram, + + response, + + ignore, + } = state; + + if *ignore { + { + let mut resp = response.lock(); + *resp = Some(HandshakeResponse::Ignored) + } + return Ok(None); + } + + let socket = &socket_details.io; + let initial_dcid = dgram.header.dcid.clone(); + let mut out = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; + + if !quiche::version_is_supported(dgram.header.version) { + warn!("Quic packet version received is not supported. Negotiating version..."); + let size = quiche::negotiate_version(&dgram.header.scid, &dgram.header.dcid, &mut out) + .explain_err(ErrorType::HandshakeError, |_| { + "creating version negotiation packet failed" + })?; + + // send data to network + send_dgram(conn_id, socket, &out[..size], dgram.recv_info.from) + .await + .explain_err(ErrorType::WriteError, |_| { + "sending version negotiation packet failed" + })?; + + // validate response + if let Some(resp_dgram) = udp_rx.recv().await { + if quiche::version_is_supported(resp_dgram.header.version) { + *dgram = resp_dgram + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "version negotiation failed as responded version is not supported", + )); + }; + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "version negotiation did not receive a response", + )); + } + }; + + // token is always present in "Initial" packets + let token = dgram.header.token.as_ref().unwrap(); + // do stateless retry if the client didn't send a token + if token.is_empty() { + trace!( + "connection {:?} stateless retry as Quic header token is empty", + conn_id + ); + + let hdr = &dgram.header; + let new_token = mint_token(hdr, &dgram.recv_info.from); + let size = quiche::retry( + &hdr.scid, + &hdr.dcid, + conn_id, + &new_token, + hdr.version, + &mut out, + ) + .explain_err(ErrorType::HandshakeError, |_| { + "creating retry packet failed" + })?; + + send_dgram(conn_id, socket, &out[..size], dgram.recv_info.from) + .await + .explain_err(ErrorType::WriteError, |_| "sending retry packet failed")?; + + // validate response + if let Some(resp_dgram) = udp_rx.recv().await { + // token is always present in "Initial" packets + let resp_token = resp_dgram.header.token.as_ref().unwrap(); + if resp_token.is_empty() { + return Err(Error::explain( + ErrorType::HandshakeError, + "Stateless retry failed. Still no token available after stateless retry." + .to_string(), + )); + } else { + *dgram = resp_dgram; + }; + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "Stateless retry did not receive a response.".to_string(), + )); + } + } + + let hdr = &dgram.header; + let token = hdr.token.as_ref().unwrap(); + let odcid = validate_token(&dgram.recv_info.from, token); + + // The token was not valid, meaning the retry failed, so drop the connection. + if odcid.is_none() { + return Err(Error::explain( + ErrorType::HandshakeError, + "Quic header has invalid address validation token.".to_string(), + )); + } + + // The destination id was not valid, so drop the connection. + if conn_id.len() != hdr.dcid.len() { + return Err(Error::explain( + ErrorType::HandshakeError, + "Quic header has invalid destination connection id.".to_string(), + )); + } + + // Reuse the source connection ID we sent in the Retry packet, + // instead of changing it again. + debug!( + "new connection {:?} odcid={:?} scid={:?} ", + hdr.dcid, initial_dcid, hdr.scid + ); + + let mut conn; + { + let mut config = configs.quic().lock(); + conn = quiche::accept( + &hdr.dcid, + Some(&initial_dcid), + dgram.recv_info.to, + dgram.recv_info.from, + &mut config, + ) + .explain_err(ErrorType::HandshakeError, |_| { + "connection instantiation failed" + })?; + } + + // receive quic data into connection + let buf = dgram.pkt.as_mut_slice(); + conn.recv(buf, dgram.recv_info) + .explain_err(ErrorType::HandshakeError, |_| { + "receiving initial data failed" + })?; + + debug!("connection {:?} starting handshake", conn_id); + // RSA handshake requires more than one packet + while !conn.is_established() { + trace!("connection {:?} creating handshake packet", conn_id); + 'tx: loop { + let (size, info) = match conn.send(out.as_mut_slice()) { + Ok((size, info)) => (size, info), + Err(quiche::Error::Done) => break 'tx, + Err(e) => { + return Err(e).explain_err(ErrorType::WriteError, |_| { + "creating handshake packet failed" + }) + } + }; + + trace!("connection {:?} sending handshake packet", conn_id); + send_dgram(conn_id, socket, &out[..size], info.to) + .await + .explain_err(ErrorType::WriteError, |_| "sending handshake packet failed")?; + } + + trace!("connection {:?} waiting for handshake response", conn_id); + 'rx: loop { + if let Some(mut dgram) = udp_rx.recv().await { + trace!("connection {:?} received handshake response", conn_id); + conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) + .explain_err(ErrorType::HandshakeError, |_| { + "receiving handshake response failed" + })?; + } else { + return Err(Error::explain( + ErrorType::HandshakeError, + "finishing handshake failed, did not receive a response", + )); + } + if udp_rx.is_empty() { + break 'rx; + } + } + + trace!("connection {:?} established={}, early_data={}, closed={}, draining={}, readable={}, timed_out={}, resumed={}", + conn_id, conn.is_established(), conn.is_in_early_data(), conn.is_closed(), + conn.is_draining(), conn.is_readable(), conn.is_timed_out(), conn.is_resumed()); + + trace!( + "connection {:?} peer_error={:?}, local_error={:?}", + conn_id, + conn.peer_error(), + conn.local_error() + ); + + handle_connection_errors(conn_id.clone(), conn.peer_error(), conn.local_error())?; + } + + let max_send_udp_payload_size = conn.max_send_udp_payload_size(); + let connection_id = conn_id; + let connection = Arc::new(Mutex::new(conn)); + let tx_notify = Arc::new(Notify::new()); + let rx_notify = Arc::new(Notify::new()); + + debug!( + "connection {:?} handshake successful, udp_rx {}", + connection_id, + udp_rx.len() + ); + let handle = EstablishedHandle { + connection_id: connection_id.clone(), + connection: connection.clone(), + rx_notify: rx_notify.clone(), + tx_notify: tx_notify.clone(), + }; + + { + let mut resp = response.lock(); + *resp = Some(HandshakeResponse::Established(handle)); + } + + let tx = ConnectionTx { + socket_details: socket_details.clone(), + connection_id: connection_id.clone(), + connection: connection.clone(), + + tx_notify: tx_notify.clone(), + tx_stats: TxStats::new(max_send_udp_payload_size), + }; + + let e_state = IncomingEstablishedState { + connection_id: connection_id.clone(), + connection: connection.clone(), + + http3_config: configs.http3().clone(), + + rx_notify: rx_notify.clone(), + tx_notify: tx_notify.clone(), + + tx_handle: tokio::spawn(tx.start()), + drop_connection: drop_connection.clone(), + socket: socket.clone(), + }; + + // send HANDSHAKE_DONE Quic frame on established connection + e_state.tx_notify.notify_waiters(); + Ok(Some(e_state)) +} + +// connection io tx directly via socket +async fn send_dgram( + id: &ConnectionId<'_>, + io: &Arc, + buf: &[u8], + to: SocketAddr, +) -> pingora_error::Result { + match io.send_to(buf, &to).await { + Ok(sent) => { + debug_assert_eq!( + sent, + buf.len(), + "amount of network sent data does not correspond to packet size" + ); + trace!( + "connection {:?} sent dgram to={:?} length={:?} ", + id, + to, + buf.len() + ); + Ok(sent) + } + Err(e) => { + error!("Failed sending packet via UDP. Error: {:?}", e); + Err(Error::explain( + ErrorType::WriteError, + format!("Failed sending packet via UDP. Error: {:?}", e), + )) + } + } +} diff --git a/pingora-core/src/upstreams/peer.rs b/pingora-core/src/upstreams/peer.rs index 1210e5428..ced9ecdaf 100644 --- a/pingora-core/src/upstreams/peer.rs +++ b/pingora-core/src/upstreams/peer.rs @@ -595,6 +595,16 @@ impl Peer for HttpPeer { fn get_tracer(&self) -> Option { self.options.tracer.clone() } + + fn ip_proto(&self) -> IpProto { + if let Some(peer_options) = self.get_peer_options() { + match peer_options.alpn { + ALPN::H3 => return IpProto::UDP, + _ => {} + } + } + IpProto::TCP + } } /// The proxy settings to connect to the remote server, CONNECT only for now diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index aef80ffe8..ff71030c5 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -30,6 +30,7 @@ use h3i::config::Config; use h3i::frame::H3iFrame; use h3i::quiche::h3::frame::Frame; use h3i::quiche::h3::Header; +use pingora_core::prelude::HttpPeer; #[tokio::test] async fn test_http() { @@ -76,7 +77,7 @@ async fn test_uds() { } #[tokio::test] -async fn test_quic_http3() -> Result<()> { +async fn test_listener_quic_http3() -> Result<()> { utils::init(); info!("Startup completed.."); @@ -158,7 +159,7 @@ async fn test_quic_http3() -> Result<()> { } #[tokio::test] -async fn test_quic_http3_timeout() -> Result<()> { +async fn test_listener_quic_http3_timeout() -> Result<()> { utils::init(); info!("Startup completed.."); From 5584baf2f23da634bbbcb718edc062f495c6fbdf Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Wed, 22 Jan 2025 19:12:51 +0100 Subject: [PATCH 30/52] initial Connector HTTP3 integration, successful request --- pingora-core/src/connectors/http/mod.rs | 52 ++ pingora-core/src/connectors/http/v3.rs | 323 ++++++++++-- pingora-core/src/connectors/l4.rs | 103 +--- pingora-core/src/connectors/mod.rs | 97 +++- pingora-core/src/protocols/http/client.rs | 4 +- pingora-core/src/protocols/http/v3/client.rs | 481 ++++++++++++++++-- pingora-core/src/protocols/http/v3/mod.rs | 173 ++++++- pingora-core/src/protocols/http/v3/server.rs | 165 ++---- .../src/protocols/l4/quic/connector.rs | 6 +- pingora-core/src/protocols/l4/quic/mod.rs | 27 + pingora-core/src/protocols/l4/stream.rs | 11 +- pingora-core/src/protocols/tls/quic/client.rs | 33 +- pingora-core/src/upstreams/peer.rs | 7 + 13 files changed, 1156 insertions(+), 326 deletions(-) diff --git a/pingora-core/src/connectors/http/mod.rs b/pingora-core/src/connectors/http/mod.rs index 7edfddb31..450e0b18a 100644 --- a/pingora-core/src/connectors/http/mod.rs +++ b/pingora-core/src/connectors/http/mod.rs @@ -14,11 +14,15 @@ //! Connecting to HTTP servers +use std::collections::HashMap; use crate::connectors::ConnectorOptions; use crate::protocols::http::client::HttpSession; use crate::upstreams::peer::Peer; use pingora_error::Result; use std::time::Duration; +use parking_lot::RwLock; +use pingora_pool::PoolNode; +use crate::protocols::{UniqueID, UniqueIDType}; pub mod v1; pub mod v2; @@ -104,6 +108,54 @@ impl Connector { } } + +// TODO: also use in v2, currently only used in v3 +pub(crate) struct InUsePool { + // TODO: use pingora hashmap to shard the lock contention + pools: RwLock>>, +} + +impl InUsePool { + pub(crate) fn new() -> Self { + InUsePool { + pools: RwLock::new(HashMap::new()), + } + } + pub(crate) fn insert(&self, reuse_hash: u64, conn: T) { + { + let pools = self.pools.read(); + if let Some(pool) = pools.get(&reuse_hash) { + pool.insert(conn.id(), conn); + return; + } + } // drop read lock + + let pool = PoolNode::new(); + pool.insert(conn.id(), conn); + let mut pools = self.pools.write(); + pools.insert(reuse_hash, pool); + } + + // retrieve a h2 conn ref to create a new stream + // the caller should return the conn ref to this pool if there are still + // capacity left for more streams + pub(crate) fn get(&self, reuse_hash: u64) -> Option { + let pools = self.pools.read(); + pools.get(&reuse_hash)?.get_any().map(|v| v.1) + } + + // release a h2_stream, this functional will cause an ConnectionRef to be returned (if exist) + // the caller should update the ref and then decide where to put it (in use pool or idle) + pub(crate) fn release(&self, reuse_hash: u64, id: UniqueIDType) -> Option { + let pools = self.pools.read(); + if let Some(pool) = pools.get(&reuse_hash) { + pool.remove(id) + } else { + None + } + } +} + #[cfg(test)] #[cfg(feature = "any_tls")] mod tests { diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 0682a761e..50cef89bc 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -1,15 +1,103 @@ -// FIXME: implement request spawning -// ConnectorOptions contains CA file path from ServerConfig +use super::HttpSession; -use crate::connectors::http::v2::{ConnectionRef, InUsePool}; +use crate::connectors::http::InUsePool; use crate::connectors::{ConnectorOptions, TransportConnector}; -use crate::protocols::http::v2::client::Http2Session; -use crate::protocols::http::v3::client::Http3Session; -use crate::protocols::l4::quic::Crypto; -use crate::upstreams::peer::Peer; +use crate::protocols::http::v3::client::{Http3Poll, Http3Session}; +use crate::protocols::http::v3::nohash::StreamIdHashMap; +use crate::protocols::http::v3::H3_SESSION_EVENTS_CHANNEL_SIZE; +use crate::protocols::l4::quic::{Connection, Crypto}; +use crate::protocols::{Digest, Stream, UniqueID, UniqueIDType}; +use crate::upstreams::peer::{Peer, ALPN}; +use log::{debug, error}; +use parking_lot::Mutex; +use pingora_error::ErrorType::{H2Error, HandshakeError, InternalError}; +use pingora_error::{Error, ErrorType, OrErr, Result}; use pingora_pool::ConnectionPool; +use quiche::h3::Event; +use quiche::ConnectionId; +use std::collections::VecDeque; +use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Duration; +use tokio::sync::mpsc::Sender; +use tokio::sync::Notify; +use tokio::sync::mpsc; +use tokio::task::JoinHandle; +use pingora_runtime::current_handle; +// FIXME: ConnectorOptions contains CA file path from ServerConfig + +pub(crate) struct ConnectionRef(Arc); + +impl ConnectionRef { + pub fn new(l4_stream: Stream, digest: Digest, conn_io: ConnectionIo, + add_sessions: Arc)>>>, + drop_sessions: Arc>>, + max_streams: usize, h3poll_task: JoinHandle>, ) -> Self { + + Self(Arc::new(ConnectionRefInner { + l4_stream, + digest, + conn_io, + max_streams, + current_streams: AtomicUsize::new(0), + max_initiated_stream_id: AtomicU64::new(0), + add_sessions, + drop_sessions, + h3poll_task + })) + } +} + +pub(crate) struct ConnectionRefInner { + // avoid dropping stream, & used for UniqueIDType + l4_stream: Stream, + + digest: Digest, + conn_io: ConnectionIo, + + // max concurrent streams this connection is allowed to create + max_streams: usize, + + // how many concurrent streams already active + current_streams: AtomicUsize, + + // last initiated stream_id + max_initiated_stream_id: AtomicU64, + + // to remove sessions from the H3Poll tasks + add_sessions: Arc)>>>, + // to remove sessions from the H3Poll tasks + drop_sessions: Arc>>, + + h3poll_task: JoinHandle> +} + +impl Drop for ConnectionRefInner { + fn drop(&mut self) { + if !self.h3poll_task.is_finished() { + self.h3poll_task.abort(); + debug!("connection {:?} stopped H3Poll task", self.conn_io.conn_id) + } + } +} + +impl UniqueID for ConnectionRef { + fn id(&self) -> UniqueIDType { + self.0.l4_stream.id() + } +} + +#[derive(Clone)] +pub(crate) struct ConnectionIo { + pub(crate) conn_id: ConnectionId<'static>, + + pub(crate) quic: Arc>, + pub(crate) http3: Arc>, + + pub(crate) rx_notify: Arc, + pub(crate) tx_notify: Arc, +} + /// Http3 connector pub struct Connector { @@ -19,7 +107,7 @@ pub struct Connector { //idle_pool: Arc>, // the pool of h2 connections that have ongoing streams //in_use_pool: crate::connectors::http::v2::InUsePool, - in_use_pool: InUsePool, + in_use_pool: InUsePool, // the h3 connection idle pool idle_pool: Arc>, crypto: Option, @@ -48,7 +136,7 @@ impl Connector { pub async fn reused_http_session( &self, peer: &P, - ) -> pingora_error::Result> { + ) -> Result> { // check in use pool first so that we use fewer total connections // then idle pool let reuse_hash = peer.reuse_hash(); @@ -69,12 +157,11 @@ impl Connector { .get(reuse_hash) .or_else(|| self.idle_pool.get(&reuse_hash)); if let Some(conn) = maybe_conn { - // FIXME: fix types, ConnectionRef = H2 only - let h2_stream = conn.spawn_stream().await?; + let h3_stream = conn.spawn_stream().await?; if conn.more_streams_allowed() { self.in_use_pool.insert(reuse_hash, conn); } - Ok(h2_stream) + Ok(h3_stream) } else { Ok(None) } @@ -85,7 +172,7 @@ impl Connector { /// This function will terminate the [Http3Session]. The corresponding h3 connection will now /// have one more free stream to use. /// - /// The h2 connection will be closed after `idle_timeout` if it has no active streams. + /// The h3 connection will be closed after `idle_timeout` if it has no active streams. pub fn release_http_session( &self, session: Http3Session, @@ -94,45 +181,181 @@ impl Connector { ) { todo!() } - /* - /// Create a new Http3 connection to the given server - pub async fn new_http_session( - &self, - peer: &P, - ) -> Result { - let stream = self.transport.new_stream(peer).await?; - - // check alpn - match stream.selected_alpn_proto() { - Some(crate::protocols::tls::ALPN) => { /* continue */ } - Some(_) => { - // H2 not supported - return Ok(crate::protocols::http::client::HttpSession(Http1Session::new(stream))); - } - None => { - // if tls but no ALPN, default to h1 - // else if plaintext and min http version is 1, this is most likely h1 - if peer.tls() - || peer - .get_peer_options() - .map_or(true, |o| o.alpn.get_min_http_version() == 1) - { - return Ok(HttpSession::H1(Http1Session::new(stream))); - } - // else: min http version=H2 over plaintext, there is no ALPN anyways, we trust - // the caller that the server speaks h2c + + /// Create a new Http3 connection to the given server + pub async fn new_http_session( + &self, + peer: &P, + ) -> Result { + let mut stream = self.transport.new_stream(peer).await?; + error!("{:?}", stream.is_quic_connection()); + if let Some(qconn) = stream.quic_connection_state() { + match qconn { + Connection::IncomingHandshake(_) => {} + Connection::IncomingEstablished(_) => {} + Connection::OutgoingHandshake(_) => {} + Connection::OutgoingEstablished(e) => { + error!("established {:?}", qconn); } } - let max_h2_stream = peer.get_peer_options().map_or(1, |o| o.max_h2_streams); - let conn = handshake(stream, max_h2_stream, peer.h2_ping_interval()).await?; - let h2_stream = conn - .spawn_stream() - .await? - .expect("newly created connections should have at least one free stream"); - if conn.more_streams_allowed() { - self.in_use_pool.insert(peer.reuse_hash(), conn); + } + error!("{:?}", stream.selected_alpn_proto()); + // TODO: verify & check how this can fit into TCP/UDP picture + // check alpn + match stream.selected_alpn_proto() { + Some(ALPN::H3) => { /* continue */ } + _ => { + // FIXME: correctly route ALPNs + return Err(Error::explain(ErrorType::InternalError, "alpn does not match h3")) } - Ok(HttpSession::H2(h2_stream)) } - */ + + let max_h3_stream = peer.get_peer_options().map_or(1, |o| o.max_h3_streams); + let conn = handshake(stream, max_h3_stream).await?; + + let h3_stream = conn + .spawn_stream() + .await? + .expect("newly created connections should have at least one free stream"); + + if conn.more_streams_allowed() { + self.in_use_pool.insert(peer.reuse_hash(), conn); + } + + Ok(HttpSession::H3(h3_stream)) + } +} + +impl ConnectionRef { + // spawn a stream if more stream is allowed, otherwise return Ok(None) + pub async fn spawn_stream(&self) -> Result> { + // Atomically check if the current_stream is over the limit + // load(), compare and then fetch_add() cannot guarantee the same + let current_streams = self.0.current_streams.fetch_add(1, Ordering::SeqCst); + if current_streams >= self.0.max_streams { + // already over the limit, reset the counter to the previous value + self.0.current_streams.fetch_sub(1, Ordering::SeqCst); + return Ok(None); + } + + let h3_session = Http3Session::new( + self.0.conn_io.clone(), + self.0.add_sessions.clone(), + self.0.drop_sessions.clone())?; + + Ok(Some(h3_session)) + } + + pub fn more_streams_allowed(&self) -> bool { + let qconn = self.0.conn_io.quic.lock(); + qconn.is_established() && + !qconn.is_closed() && + !qconn.is_draining() && + qconn.peer_streams_left_bidi() > 0 + } +} + +async fn handshake( + mut stream: Stream, + max_streams: usize +) -> Result { + // Safe guard: new_http_session() assumes there should be at least one free stream + if max_streams == 0 { + return Error::e_explain(H2Error, "zero max_stream configured"); + } + + let unique_id = stream.id(); + let digest = Digest { + // NOTE: this field is always false because the digest is shared across all streams + // The streams should log their own reuse info + ssl_digest: stream.get_ssl_digest(), + // TODO: log h3 handshake time + timing_digest: stream.get_timing_digest(), + proxy_digest: stream.get_proxy_digest(), + socket_digest: stream.get_socket_digest(), + }; + let Some(quic_state) = stream.quic_connection_state() else { + return Err(Error::explain(InternalError, "stream is not a Quic stream")) + }; + + let conn_io = match quic_state { + Connection::IncomingHandshake(_) | + Connection::IncomingEstablished(_) | + Connection::OutgoingHandshake(_) => { + return Err(Error::explain(InternalError, "invalid Quic stream state")) + } + Connection::OutgoingEstablished(e_state) => { + let hconn = { + let mut conn = e_state.connection.lock(); + quiche::h3::Connection::with_transport(&mut conn, &e_state.http3_config) + .explain_err(HandshakeError, |_| "during H3 handshake") + }?; + + ConnectionIo { + conn_id: e_state.connection_id.clone(), + quic: e_state.connection.clone(), + http3: Arc::new(Mutex::new(hconn)), + rx_notify: e_state.rx_notify.clone(), + tx_notify: e_state.tx_notify.clone(), + } + } + }; + debug!("H3 handshake to server done."); + + + let add_sessions = Arc::new(Mutex::new(VecDeque::default())); + let drop_sessions = Arc::new(Mutex::new(VecDeque::default())); + + let h3poll = Http3Poll { + conn_io: conn_io.clone(), + sessions: Default::default(), + add_sessions: add_sessions.clone(), + drop_sessions: drop_sessions.clone(), + }; + let h3poll_task = current_handle().spawn(h3poll.start()); + + Ok(ConnectionRef::new( + stream, + digest, + conn_io, + add_sessions, + drop_sessions, + max_streams, + h3poll_task + )) } + +#[cfg(test)] +mod quic_tests { + use http::Version; + use crate::connectors::quic_tests::quic_listener_peer; + use pingora_error::Result; + use pingora_http::RequestHeader; + use super::*; + + #[tokio::test] + async fn test_connector_quic_http3() -> Result<()> { + let (_server_handle, peer) = quic_listener_peer()?; + + + let connector = Connector::new(None); + let mut session = connector.new_http_session(&peer).await?; + + let mut req = RequestHeader::build("GET", b"/", Some(3))?; + req.insert_header(http::header::HOST, "openresty.org")?; + + session.write_request_header(Box::new(req)).await?; + session.finish_request_body().await?; + session.read_response_header().await?; + + let resp = session.response_header(); + + assert!(resp.is_some()); + if let Some(resp) = resp { + assert_eq!(resp.status.as_str(), "200"); + assert_eq!(resp.version, Version::HTTP_3); + } + + Ok(()) + } +} \ No newline at end of file diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index 70fac0b96..144acfc11 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -147,8 +147,13 @@ where } }?; + let mut quic_http3_config = None; + if let Some(peer_options) = peer.get_peer_options() { + quic_http3_config = peer_options.quic_http3_config.clone() + }; + // FIXME: supply configs & default configs - Connection::initiate(socket, None)?.into() + Connection::initiate(socket, quic_http3_config)?.into() } SocketAddr::Unix(_addr) => { // TODO: tokio::net::UnixDatagram support could be an option @@ -642,108 +647,26 @@ mod tests { #[cfg(test)] mod quic_tests { - use crate::apps::http_app::ServeHttp; use crate::connectors::l4::connect; - use crate::listeners::{Listeners, ALPN}; - use crate::prelude::HttpPeer; - use crate::protocols::http::ServerSession; - use crate::protocols::l4::quic::{Connection, QuicHttp3Configs, MAX_IPV6_BUF_SIZE}; + use crate::protocols::l4::quic::Connection; use crate::protocols::ConnectionState; - use crate::server::Server; - use crate::services::listening::Service; - use async_trait::async_trait; - use bytes::{BufMut, BytesMut}; - use http::{Response, StatusCode}; - use log::{debug, info}; - use pingora_error::Result; - use pingora_timeout::timeout; - use std::thread; - use std::time::Duration; use crate::connectors::{do_connect, tls}; - - fn quic_listener() { - env_logger::builder() - .format_timestamp(Some(env_logger::TimestampPrecision::Nanos)) - .init(); - - let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR")); - let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR")); - - let mut my_server = Server::new(None).unwrap(); - my_server.bootstrap(); - - let configs = QuicHttp3Configs::from_cert_key_paths(&cert_path, &key_path).unwrap(); - let listeners = Listeners::quic("0.0.0.0:6147", configs).unwrap(); - - let mut echo_service_http = - Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); - echo_service_http.threads = Some(4); - - my_server.add_service(echo_service_http); - my_server.run_forever(); - } + use crate::connectors::quic_tests::quic_listener_peer; + use pingora_error::Result; #[tokio::test] - async fn test_connector_quic_http3() -> Result<()> { - let _server_handle = thread::spawn(|| { - quic_listener(); - }); - info!("Startup completed.."); + async fn test_connector_quic_handshake() -> Result<()> { + let (_server_handle, peer) = quic_listener_peer()?; - let port = "6147"; - let mut peer = Box::new(HttpPeer::new( - format!("127.0.0.1:{port}"), - false, - "openrusty.org".to_string(), - )); - peer.options.alpn = ALPN::H3; - - let mut pre_handshake_stream = connect(&*peer, None).await?; + let mut pre_handshake_stream = connect(&peer, None).await?; assert!(pre_handshake_stream.quic_connection_state().is_some()); let tls_connector = tls::Connector::new(None); - let mut stream = do_connect(&*peer, None, None, &tls_connector.ctx).await?; + let mut stream = do_connect(&peer, None, None, &tls_connector.ctx).await?; assert!(stream.quic_connection_state().is_some()); let connection = stream.quic_connection_state().unwrap(); assert!(matches!(connection, Connection::OutgoingEstablished(_))); Ok(()) } - - #[derive(Clone)] - pub struct EchoApp; - #[async_trait] - impl ServeHttp for EchoApp { - async fn response(&self, http_stream: &mut ServerSession) -> Response> { - // read timeout of 2s - let read_timeout = 2000; - let body_future = async { - let mut body = BytesMut::with_capacity(MAX_IPV6_BUF_SIZE); - while let Ok(b) = http_stream.read_request_body().await { - match b { - None => break, // finished reading request - Some(b) => body.put(b), - } - } - if body.is_empty() { - body.put("no body!".as_bytes()); - } - body.freeze() - }; - - let body = match timeout(Duration::from_millis(read_timeout), body_future).await { - Ok(res) => res, - Err(_) => { - panic!("Timed out after {:?}ms", read_timeout); - } - }; - - Response::builder() - .status(StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/html") - .header(http::header::CONTENT_LENGTH, body.len()) - .body(body.to_vec()) - .unwrap() - } - } } diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index b35427788..a9137ca81 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -185,7 +185,6 @@ impl TransportConnector { do_connect(peer, bind_to, alpn_override, &self.tls_ctx.ctx).await? }; - // FIXME: here stream should be Connection::Established Ok(stream) } @@ -553,3 +552,99 @@ mod tests { assert!(etype != ConnectTimedout || !context.contains("total-connection timeout")); } } + +#[cfg(test)] +pub(crate) mod quic_tests { + use std::thread; + use std::thread::JoinHandle; + use crate::apps::http_app::ServeHttp; + use crate::listeners::{Listeners, ALPN}; + use crate::protocols::http::ServerSession; + use crate::protocols::l4::quic::{QuicHttp3Configs, MAX_IPV6_BUF_SIZE}; + use crate::server::Server; + use crate::services::listening::Service; + use async_trait::async_trait; + use bytes::{BufMut, BytesMut}; + use http::{Response, StatusCode}; + use std::time::Duration; + use log::info; + use pingora_timeout::timeout; + use pingora_error::Result; + use crate::prelude::HttpPeer; + + pub(crate) fn quic_listener_peer() -> Result<(JoinHandle<()>, HttpPeer)> { + let port = 6147u16; + fn inner(port: u16) { + env_logger::builder() + .format_timestamp(Some(env_logger::TimestampPrecision::Nanos)) + .init(); + + let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR")); + let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR")); + + let mut my_server = Server::new(None).unwrap(); + my_server.bootstrap(); + + let configs = QuicHttp3Configs::from_cert_key_paths(&cert_path, &key_path).unwrap(); + let listeners = Listeners::quic(format!("0.0.0.0:{port}").as_str(), configs).unwrap(); + + let mut echo_service_http = + Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); + echo_service_http.threads = Some(4); + + my_server.add_service(echo_service_http); + my_server.run_forever(); + } + + let server_handle = thread::spawn(move || { + inner(port); + }); + + let mut peer = HttpPeer::new( + format!("127.0.0.1:{port}"), + false, + "openrusty.org".to_string(), + ); + peer.options.alpn = ALPN::H3; + + info!("Startup completed.."); + Ok((server_handle, peer)) + } + + #[derive(Clone)] + pub(crate) struct EchoApp; + #[async_trait] + impl ServeHttp for EchoApp { + async fn response(&self, http_stream: &mut ServerSession) -> Response> { + // read timeout of 2s + let read_timeout = 2000; + let body_future = async { + let mut body = BytesMut::with_capacity(MAX_IPV6_BUF_SIZE); + while let Ok(b) = http_stream.read_request_body().await { + match b { + None => break, // finished reading request + Some(b) => body.put(b), + } + } + if body.is_empty() { + body.put("no body!".as_bytes()); + } + body.freeze() + }; + + let body = match timeout(Duration::from_millis(read_timeout), body_future).await { + Ok(res) => res, + Err(_) => { + panic!("Timed out after {:?}ms", read_timeout); + } + }; + + Response::builder() + .status(StatusCode::OK) + .header(http::header::CONTENT_TYPE, "text/html") + .header(http::header::CONTENT_LENGTH, body.len()) + .body(body.to_vec()) + .unwrap() + } + } +} \ No newline at end of file diff --git a/pingora-core/src/protocols/http/client.rs b/pingora-core/src/protocols/http/client.rs index e3c28765f..657694ad0 100644 --- a/pingora-core/src/protocols/http/client.rs +++ b/pingora-core/src/protocols/http/client.rs @@ -61,7 +61,7 @@ impl HttpSession { Ok(()) } HttpSession::H2(h2) => h2.write_request_header(req, false), - HttpSession::H3(h3) => h3.write_request_header(req, false), + HttpSession::H3(h3) => h3.write_request_header(req).await, } } @@ -74,7 +74,7 @@ impl HttpSession { Ok(()) } HttpSession::H2(h2) => h2.write_request_body(data, end), - HttpSession::H3(h3) => h3.write_request_body(data, end), + HttpSession::H3(h3) => h3.write_request_body(data, end).await, } } diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index 45bcdb851..d9a123363 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -1,85 +1,382 @@ -use crate::connectors::http::v2::ConnectionRef; +use std::cmp; +use std::collections::VecDeque; +use std::fmt::Debug; +use std::sync::Arc; +use crate::connectors::http::v3::{ConnectionIo, ConnectionRef}; use crate::protocols::l4::socket::SocketAddr; use crate::protocols::{Digest, UniqueIDType}; -use bytes::Bytes; -use h2::client::SendRequest; +use bytes::{BufMut, Bytes, BytesMut}; use h2::SendStream; use http::HeaderMap; use pingora_http::{RequestHeader, ResponseHeader}; use std::time::Duration; +use log::{debug, error, trace, warn}; +use parking_lot::Mutex; +use quiche::{h3, ConnectionId}; +use quiche::h3::{Event, Header, NameValue}; +use tokio::sync::mpsc; +use tokio::sync::mpsc::{Receiver, Sender}; +use pingora_error::{Error, ErrorType, OrErr, Result}; +use pingora_error::ErrorType::{H3Error, InternalError, InvalidHTTPHeader, ReadError, WriteError}; +use crate::protocols::http::v3::{data_finished_event, event_to_response_headers, header_size, headervec_to_headermap, request_headers_to_event, stream_capacity, H3_SESSION_EVENTS_CHANNEL_SIZE}; +use crate::protocols::http::v3::nohash::StreamIdHashMap; +use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; -// FIXME: implement client H3Session pub struct Http3Session { + conn_io: ConnectionIo, + stream_id: Option, + + /// The read timeout, which will be applied to both reading the header and the body. + /// The timeout is reset on every read. This is not a timeout on the overall duration of the + /// response. + // FIXME: race with timeout if present pub read_timeout: Option, - send_req: SendRequest, - conn: ConnectionRef, + + // HTTP3 event channel for this stream_id + event_rx: Option>, + + // sent request + request_header_written: Option>, + // received response + response_header: Option, + + // sent body bytes + body_sent: usize, + // send is finished (Quic finished frame sent) + send_ended: bool, + + // body bytes read + body_read: usize, + // read is finished (Quic finished frame received) + read_ended: bool, + + // remove session from active sessions + drop_sessions: Arc>>, + // add session to active sessions + add_sessions: Arc)>>> +} + +impl Drop for Http3Session { + fn drop(&mut self) { + if let Some(stream_id) = self.stream_id { + { + let mut sessions = self.drop_sessions.lock(); + sessions.push_back(stream_id); + } + debug!("connection {:?} dropping session with stream id {}", + self.conn_io.conn_id, stream_id) + } + } } impl Http3Session { - pub(crate) fn new(send_req: SendRequest, conn: ConnectionRef) -> Self { - Self { + pub(crate) fn new(conn_io: ConnectionIo, + add_sessions: Arc)>>>, + drop_sessions: Arc>>) + -> Result { + Ok(Self { + conn_io, + stream_id: None, read_timeout: None, - send_req, - conn, - } + event_rx: None, + request_header_written: None, + response_header: None, + body_sent: 0, + send_ended: false, + body_read: 0, + read_ended: false, + add_sessions, + drop_sessions, + }) } /// Write the request header to the server - pub fn write_request_header( + pub async fn write_request_header( &mut self, - mut req: Box, - end: bool, - ) -> pingora_error::Result<()> { - todo!() + req: Box + ) -> Result<()> { + if self.request_header_written.is_some() { + // cannot send again + warn!("request not sent as session already sent a request"); + return Ok(()); + } + + let headers = request_headers_to_event(&req)?; + let stream_id = self.send_request(&headers, false).await?; + error!("stream_id {}", stream_id); + + self.request_header_written = Some(req); + Ok(()) } + async fn send_request(&mut self, headers: &[T], fin: bool) -> Result { + // sending the request creates the underlying quic stream & according stream id + // it is not possible to check the stream capacity before sending the request + let stream_id = { + let mut qconn = self.conn_io.quic.lock(); + let mut hconn = self.conn_io.http3.lock(); + + hconn.send_request(&mut qconn, headers, fin) + .explain_err(WriteError, |_| "failed to send http3 request headers")? + }; + + let (tx, rx) = mpsc::channel::(H3_SESSION_EVENTS_CHANNEL_SIZE); + self.stream_id = Some(stream_id); + self.event_rx = Some(rx); + + { + let mut add_sessions = self.add_sessions.lock(); + add_sessions.push_back((stream_id, tx)) + } + + Ok(stream_id) + } + + // TODO: potentially refactor/unify with server side + /// Write a request body chunk - pub fn write_request_body(&mut self, data: Bytes, end: bool) -> pingora_error::Result<()> { - todo!() + pub async fn write_request_body(&mut self, data: Bytes, end: bool) -> Result<()> { + if self.send_ended { + // NOTE: within http3 content-length tracking is not available + warn!("Cannot write request body after stream ended. Dropping the extra data."); + return Ok(()); + } else if self.request_header_written.is_none() { + return Err(Error::explain( + H3Error, + "trying to send the request body before request header being sent", + )); + }; + let Some(stream_id) = self.stream_id else { + return Err(Error::explain(H3Error, "stream id not present")); + }; + + let mut sent_len = 0; + let mut fin = end; + while sent_len < data.len() { + let required = cmp::min(data.len() - sent_len, MAX_IPV6_QUIC_DATAGRAM_SIZE); + let capacity = stream_capacity(&self.conn_io.quic, stream_id, required, + &self.conn_io.rx_notify, &self.conn_io.tx_notify).await?; + + let send = if capacity > data.len() - sent_len { + &data[sent_len..data.len()] + } else { + &data[sent_len..sent_len + capacity] + }; + + fin = sent_len + send.len() == data.len() && end; + match self.send_body(send, fin) { + Ok(sent_size) => { + debug_assert_eq!(sent_size, send.len()); + sent_len += sent_size; + } + Err(e) => { + return Err(e).explain_err(WriteError, |_| { + "writing h3 request body to downstream" + }) + } + } + } + debug_assert_eq!(fin, end); + debug_assert_eq!(sent_len, data.len()); + if end { + self.conn_io.tx_notify.notify_waiters(); + } + + self.body_sent += sent_len; + self.send_ended = self.send_ended || end; + Ok(()) + } + + + // TODO: potentially refactor/unify with server side + fn send_body(&self, body: &[u8], fin: bool) -> Result { + let mut qconn = self.conn_io.quic.lock(); + let mut hconn = self.conn_io.http3.lock(); + + hconn.send_body(&mut qconn, self.stream_id()?, body, fin) + .explain_err(WriteError, |e| format!("failed to send http3 request body {:?}", e)) } + // TODO: potentially refactor/unify with server side /// Signal that the request body has ended - pub fn finish_request_body(&mut self) -> pingora_error::Result<()> { - todo!() + pub fn finish_request_body(&mut self) -> Result<()> { + if self.send_ended { + // already ended the stream + return Ok(()); + } + + if self.request_header_written.is_some() { + // use an empty data frame to signal the end + self.send_body(&[], true).explain_err( + WriteError, + |e| format! {"Writing h3 request body finished to downstream failed. {e}"}, + )?; + self.conn_io.tx_notify.notify_waiters(); + self.send_ended = true; + } + // else: the response header is not sent, do nothing now. + + Ok(()) } /// Read the response header - pub async fn read_response_header(&mut self) -> pingora_error::Result<()> { - todo!() + pub async fn read_response_header(&mut self) -> Result<()> { + if self.response_header.is_some() { + // already received + return Ok(()) + }; + + let (headers, _) = headers_event(self.stream_id()?, self.event_rx()?).await?; + let map = event_to_response_headers(&headers)?; + + self.response_header = Some(map); + Ok(()) + } + + fn stream_id(&self) -> Result { + let Some(stream_id) = self.stream_id else { + return Err(Error::explain(H3Error, "stream id not present")); + }; + Ok(stream_id) + } + + fn event_rx(&mut self) -> Result<&mut Receiver> { + let Some(ref mut event_rx) = &mut self.event_rx else { + return Err(Error::explain(H3Error, "event rx not present")); + }; + Ok(event_rx) } + // TODO: potentially refactor/unify with server side /// Read the response body /// /// `None` means, no more body to read - pub async fn read_response_body(&mut self) -> pingora_error::Result> { - todo!() + pub async fn read_response_body(&mut self) -> Result> { + if self.read_ended { + return Ok(None); + } + + + let read_timeout = self.read_timeout.clone(); + tokio::select! { + res = data_finished_event(self.stream_id()?, self.event_rx()?) => { + self.read_ended = true; + res? + }, + _timedout = async { + if let Some(read_timeout) = read_timeout { + tokio::time::sleep(read_timeout) + } else { + tokio::time::sleep(Duration::MAX) + } + } => { + return Err(Error::explain(ErrorType::ReadTimedout, "reading body timed out")) + } + } + + let mut buf = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; + let size = match self.recv_body(self.stream_id()?, &mut buf) { + Ok(size) => size, + Err(h3::Error::Done) => { + trace!("recv_body done"); + return Ok(Some(BytesMut::with_capacity(0).into())); + } + Err(e) => { + return Err(Error::explain( + ReadError, + format!("reading body failed with {}", e), + )) + } + }; + + let mut data = BytesMut::with_capacity(size); + data.put_slice(&buf[..size]); + let data: Bytes = data.into(); + + self.body_read += size; + + trace!("ready body len={:?}", data.len()); + Ok(Some(data)) } + // TODO: potentially refactor/unify with server side + // TODO: check if result type can be changed (requires Error::Done not being used) + fn recv_body(&self, stream_id: u64, out: &mut [u8]) -> h3::Result { + let mut qconn = self.conn_io.quic.lock(); + let mut hconn = self.conn_io.http3.lock(); + debug!( + "H3 connection {:?} stream {} receiving body", + qconn.trace_id(), stream_id + ); + hconn.recv_body(&mut qconn, stream_id, out) + } + + /// Whether the response has ended pub fn response_finished(&self) -> bool { - todo!() + self.read_ended } /// Check whether stream finished with error. /// Like `response_finished`, but also attempts to poll the h2 stream for errors that may have /// caused the stream to terminate, and returns them as `H2Error`s. - pub fn check_response_end_or_error(&mut self) -> pingora_error::Result { + pub fn check_response_end_or_error(&mut self) -> Result { todo!() } /// Read the optional trailer headers - pub async fn read_trailers(&mut self) -> pingora_error::Result> { - todo!() + /// in case pre-conditions are not met, the call returns None + /// + /// requires that the request sent contains the TE header including the "trailers" keyword + /// for further details see RFC9110 Section 6.5.1 + /// + /// additionally the response headers need to contain the `trailers` header + pub async fn read_trailers(&mut self) -> Result> { + if !self.read_ended { + warn!("trying to read trailers before body finished"); + return Ok(None) + }; + + // RFC9110 Section 6.5.1 + // The presence of the keyword "trailers" in the TE header field (Section 10.1.4) of + // a request indicates that the client is willing to accept trailer fields, + // on behalf of itself and any downstream clients. + let mut client_accepts = false; + if let Some(headers) = &self.request_header_written { + if let Some(te_header) = headers.headers.get(http::header::TE) { + let te = te_header.to_str() + .explain_err(InvalidHTTPHeader, |_| "failed to parse TE header")?; + + client_accepts = te.contains("trailers") + } + }; + + let mut response_has_trailers = false; + if let Some(response) = &self.response_header { + response_has_trailers = response.headers.get(http::header::TRAILER).is_some() + }; + + if !(client_accepts && response_has_trailers) { + return Ok(None) + } + + // as per RFC9114/Section 4.1 it is an optional SINGLE header frame + // only possible when supported by the version of HTTP in use and enabled by an explicit + // framing mechanism + let (trailers, _) = headers_event(self.stream_id()?, self.event_rx()?).await?; + let trailer_map = headervec_to_headermap(&trailers)?; + + Ok(Some(trailer_map)) } /// The request header if it is already sent pub fn request_header(&self) -> Option<&RequestHeader> { - todo!() + self.request_header_written.as_deref() } /// The response header if it is already read pub fn response_header(&self) -> Option<&ResponseHeader> { - todo!() + self.response_header.as_ref() } /// Give up the http session abruptly. @@ -136,3 +433,127 @@ impl Http3Session { todo!() } } + +async fn headers_event(stream_id: u64, event_rx: &mut Receiver) -> Result<(Vec

, bool)> { + loop { + match event_rx.recv().await { + Some(ev) => { + trace!("stream {} event {:?}", stream_id, ev); + match ev { + Event::Finished => { + debug_assert!(false, "Finished event when Headers requested"); + } + Event::Headers { list, more_frames } => { + return Ok((list, more_frames)) + } + Event::Data => { + debug_assert!(false, "Data event when Headers requested"); + } + Event::Reset(error_code) => { + return Err(Error::explain( + H3Error, + format!("stream was reset with error code {}", error_code), + )) + } + Event::PriorityUpdate => { + debug_assert!(false, "PriorityUpdate event when Headers requested"); + warn!("received unhandled PriorityUpdate event"); + } + Event::GoAway => { + debug_assert!(false, "PriorityUpdate event when Headers requested"); + // RFC 9114 Section 5.2 & 7.2.6 + warn!("received unhandled GoAway event"); + } + } + } + None => { + return Err(Error::explain( + ReadError, + "H3 session event channel disconnected", + )) + } + } + } +} + +pub(crate) struct Http3Poll { + pub(crate) conn_io: ConnectionIo, + pub(crate) sessions: StreamIdHashMap>, + pub(crate) drop_sessions: Arc>>, + pub(crate) add_sessions: Arc)>>>, +} + +impl Http3Poll { + pub(crate) async fn start(mut self) -> Result<()> { +// let conn_id = self.conn_io.conn_id.clone(); + 'poll: loop { + let res = { + let mut qconn = self.conn_io.quic.lock(); + let mut hconn = self.conn_io.http3.lock(); + hconn.poll(&mut qconn) + }; + + let (stream_id, ev) = match res { + Ok((stream, ev)) => (stream, ev), + Err(e) => match e { + h3::Error::Done => { + self.sessions_housekeeping()?; + self.conn_io.rx_notify.notified().await; + continue 'poll + } + _ => { + break 'poll Err(e).explain_err( + H3Error, |_| format!("failed to poll h3 connection {:?}" , e)) + } + } + }; + + let session = if let Some(session) = self.sessions.get_mut(&stream_id) { + session + } else { + self.add_sessions()?; + let Some(session) = self.sessions.get_mut(&stream_id) else { + return Err(Error::explain( + InternalError, + format!("missing session channel for stream id {}", stream_id))) + }; + session + }; + + session.send(ev).await + .explain_err(H3Error, |_| "failed to forward h3 event to session")? + } + } + + fn sessions_housekeeping(&mut self) -> Result<()> { + self.drop_sessions()?; + self.add_sessions() + } + + fn add_sessions(&mut self) -> Result<()>{ + let mut add_sessions = self.add_sessions.lock(); + while let Some((stream_id, sender)) = add_sessions.pop_front() { + if let Some(_sender) = self.sessions.insert(stream_id, sender) { + debug_assert!(false, "stream id {} existed", stream_id); + return Err(Error::explain( + InternalError, format!("stream id {} was already present in sessions", stream_id))) + } else { + debug!("connection {:?} added stream id {} to sessions", self.conn_io.conn_id, stream_id) + } + } + Ok(()) + } + + fn drop_sessions(&mut self) -> Result<()>{ + let mut drop_sessions = self.drop_sessions.lock(); + while let Some(stream_id) = drop_sessions.pop_front() { + if let Some(_sender) = self.sessions.remove(&stream_id) { + debug!("connection {:?} removed stream id {} from sessions", self.conn_io.conn_id, stream_id) + } else { + return Err(Error::explain( + InternalError, format!("failed to remove session with stream id {}", stream_id))) + } + } + Ok(()) + } +} \ No newline at end of file diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index bdd7c9431..adb8c3f42 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -15,11 +15,23 @@ //! HTTP/3 implementation use http::{HeaderMap, HeaderName, HeaderValue, Request, Uri, Version}; -use log::warn; -use pingora_error::{ErrorType, OrErr, Result}; +use log::{error, trace, warn}; +use pingora_error::{Error, ErrorType, OrErr, Result}; use pingora_http::{RequestHeader, ResponseHeader}; -use quiche::h3::{Header, NameValue}; +use quiche::h3::{Event, Header, NameValue}; use std::fmt::Debug; +use std::future::Future; +use std::pin::Pin; +use http::uri::{Authority, Scheme}; +use parking_lot::Mutex; +use quiche::Connection; +use tokio::sync::mpsc::Receiver; +use tokio::sync::Notify; +use pingora_error::ErrorType::{H3Error, InvalidHTTPHeader, ReadError}; +use crate::protocols::http::HttpVersion; + +pub const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; +pub const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; pub mod client; pub mod nohash; @@ -69,6 +81,59 @@ fn response_headers_to_event(resp: &ResponseHeader) -> Vec
{ qheaders } +fn request_headers_to_event(req: &RequestHeader) -> Result> { + let mut qheaders: Vec
= Vec::with_capacity(req.headers.len() + 4); + // only encrypted traffic supported in HTTP3 + qheaders.push(Header::new(b":scheme".as_slice(), Scheme::HTTPS.to_string().as_bytes())); + + // use authority when present + let authority = if let Some(authority) = req.uri.authority() { + authority.clone() + } else { + // or use host header as authority + let host = req.headers.get(http::header::HOST); + let Some(host) = host else { + return Error::e_explain(InvalidHTTPHeader, "no authority header for h3"); + }; + // validate + Authority::try_from(host.as_bytes()) + .explain_err(InvalidHTTPHeader, |_| format!("invalid authority from host {:?}", host))? + }; + qheaders.push(Header::new(b":authority".as_slice(), authority.as_str().as_bytes())); + + let Some(path) = req.uri.path_and_query() else { + return Error::e_explain(InvalidHTTPHeader, "no path header for h3"); + }; + qheaders.push(Header::new(b":path".as_slice(), path.as_str().as_bytes())); + qheaders.push(Header::new(b":method".as_slice(), req.method.as_str().as_bytes())); + + // copy all other request headers + // the pseudo-headers starting with ":" need to be sent before regular headers + for (k, v) in &req.headers { + qheaders.push(Header::new(k.as_str().as_bytes(), v.as_bytes())) + } + Ok(qheaders) +} + +fn event_to_response_headers(resp: &Vec
) -> Result { + // pseudo-headers have to be first, response only has a single valid pseudo header ":status" + // which MUST be included as per RFC9114 Section 4.3.2 + let mut response = ResponseHeader::build(resp[0].value(), Some(resp.len() - 1))?; + response.set_version(Version::HTTP_3); + + for h in &resp[1..] { + let k = HeaderName::from_bytes(h.name()) + .explain_err(InvalidHTTPHeader, + |_| format!("failed to parse header name {:?}", h.name()))?; + let v = HeaderValue::from_bytes(h.value()) + .explain_err(InvalidHTTPHeader, + |_| format!("failed to parse header value {:?}", h.value()))?; + response.append_header(k, v)?; + } + + Ok(response) +} + fn headermap_to_headervec(headers: &HeaderMap) -> Vec
{ headers .iter() @@ -76,8 +141,110 @@ fn headermap_to_headervec(headers: &HeaderMap) -> Vec
{ .collect() } +fn headervec_to_headermap(headers: &Vec
) -> Result { + let mut map = HeaderMap::with_capacity(headers.len()); + for h in headers { + if h.name().len() > 0 && h.name()[0] == b":".as_slice()[0] { + let k = HeaderName::from_bytes(h.name()) + .explain_err(InvalidHTTPHeader, + |_| format!("failed to parse header name {:?}", h.name()))?; + let v = HeaderValue::from_bytes(h.value()) + .explain_err(InvalidHTTPHeader, + |_| format!("failed to parse header value {:?}", h.value()))?; + map.insert(k, v); + } + } + Ok(map) +} + fn header_size(headers: &[T]) -> usize { headers .iter() .fold(0, |acc, h| acc + h.value().len() + h.name().len() + 32) } + +fn stream_capacity<'a>( + conn: &'a Mutex, + stream_id: u64, + required: usize, + rx_notify: &'a Notify, + tx_notify: &'a Notify +) -> Pin> + Send + 'a>> { + Box::pin(async move { + let capacity; + { + let qconn = conn.lock(); + let conn_id = qconn.trace_id(); + capacity = qconn.stream_capacity(stream_id) + .explain_err(ErrorType::WriteError, |e| { + format!( + "H3 connection {} failed to acquire capacity for stream {} error {:?}", + conn_id, stream_id, e + ) + })?; + } + + // FIXME: handle capacity <= required e.g. required is gt configured send buffers + if capacity >= required { + Ok(capacity) + } else { + tx_notify.notify_waiters(); + rx_notify.notified().await; + stream_capacity(conn, stream_id, required, rx_notify, tx_notify).await + } + }) +} + +async fn data_finished_event(stream_id: u64, event_rx: &mut Receiver) -> Result<()> { + loop { + match event_rx.recv().await { + Some(ev) => { + match ev { + Event::Finished => { + trace!("stream {} event {:?}", stream_id, ev); + return Ok(()); + } + Event::Headers { .. } => { + debug_assert!(false, "Headers or Finished event when Data requested"); + } + Event::Data => { + trace!("stream {} event {:?}", stream_id, ev); + return Ok(()); + } + Event::Reset(error_code) => { + return Err(Error::explain( + H3Error, + format!("stream was reset with error code {}", error_code), + )) + } + Event::PriorityUpdate => { + // TODO: this step should be deferred until + // h3::Connection::poll() returns Error::Done + // see also h3::Connection::send_response_with_priority() + + /* + // https://datatracker.ietf.org/doc/rfc9218/ + let mut hconn = self.h3_connection.lock(); + // field value has the same content as the header::Priority field + let field_value = hconn.take_last_priority_update(self.stream_id) + .explain_err(H3Error, "failed to receive priority update field value")?; + */ + warn!("received unhandled priority update"); + continue; + } + Event::GoAway => { + // RFC 9114 Section 5.2 & 7.2.6 + warn!("received unhandled go-away"); + continue; + } + } + } + None => { + return Err(Error::explain( + ReadError, + "H3 session event channel disconnected", + )) + } + } + } +} \ No newline at end of file diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 56c9b42c9..d6916e552 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -22,21 +22,18 @@ use http::uri::PathAndQuery; use http::{header, HeaderMap, HeaderName}; use log::{debug, error, info, trace, warn}; use parking_lot::Mutex; -use pingora_error::{Error, ErrorType, OrErr, Result}; +use pingora_error::{Error, OrErr, Result}; +use pingora_error::ErrorType::{ConnectError, H3Error, InternalError, ReadError, WriteError}; use pingora_http::{RequestHeader, ResponseHeader}; use std::cmp; use std::collections::VecDeque; use std::fmt::Debug; -use std::future::Future; -use std::pin::Pin; use std::sync::Arc; use std::time::Duration; use crate::protocols::http::body_buffer::FixedBuffer; use crate::protocols::http::v3::nohash::StreamIdHashMap; -use crate::protocols::http::v3::{ - event_to_request_headers, header_size, headermap_to_headervec, response_headers_to_event, -}; +use crate::protocols::http::v3::{data_finished_event, event_to_request_headers, header_size, headermap_to_headervec, response_headers_to_event, stream_capacity, H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY, H3_SESSION_EVENTS_CHANNEL_SIZE}; use crate::protocols::http::HttpTask; use crate::protocols::l4::quic::{Connection, MAX_IPV6_QUIC_DATAGRAM_SIZE}; pub use quiche::h3::Config as H3Options; @@ -45,8 +42,6 @@ use quiche::{h3, Connection as QuicheConnection, ConnectionId, Shutdown}; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::{mpsc, Notify}; -const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; -const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; const BODY_BUF_LIMIT: usize = 1024 * 64; const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); @@ -57,7 +52,7 @@ const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result { let Some(conn) = io.quic_connection_state() else { return Err(Error::explain( - ErrorType::ConnectError, + ConnectError, "H3 handshake only possible on Quic connections", )); }; @@ -73,7 +68,7 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

) -> Result

{ return Err(Error::explain( - ErrorType::InternalError, + InternalError, "connection needs to be established, invalid state", )) } @@ -154,7 +149,7 @@ impl H3Connection { debug!("H3 connection {:?} sending GoAway", self.connection_id); hconn .send_goaway(&mut qconn, self.max_accepted_stream_id) - .explain_err(ErrorType::H3Error, |_| "failed to send graceful shutdown")?; + .explain_err(H3Error, |_| "failed to send graceful shutdown")?; self.tx_notify.notify_waiters(); } @@ -176,13 +171,13 @@ impl H3Connection { let mut qconn = self.quic_connection.lock(); qconn .close(false, 0x00, b"graceful shutdown") - .explain_err(ErrorType::H3Error, |_| "failed to close quic connection")?; + .explain_err(H3Error, |_| "failed to close quic connection")?; self.tx_notify.notify_waiters(); } if is_timeout { Err(Error::explain( - ErrorType::InternalError, + InternalError, "h3 session draining timed out with active sessions", )) } else { @@ -311,7 +306,7 @@ impl H3Session { channel .send(ev) .await - .explain_err(ErrorType::WriteError, |e| { + .explain_err(WriteError, |e| { format!("failed to send on event channel with {}", e) })?; } else { @@ -334,7 +329,7 @@ impl H3Session { let mut hconn = conn.h3_connection.lock(); hconn .send_goaway(&mut qconn, conn.max_accepted_stream_id) - .explain_err(ErrorType::InternalError, |_| { + .explain_err(InternalError, |_| { "failed to send goaway" })?; conn.tx_notify.notify_waiters(); @@ -471,7 +466,7 @@ impl H3Session { conn.tx_notify.notify_waiters(); error!("H3 connection closed with error {:?}.", e); - return Err(e).explain_err(ErrorType::H3Error, |_| { + return Err(e).explain_err(H3Error, |_| { "while accepting new downstream requests" }); } @@ -491,11 +486,14 @@ impl H3Session { /// Read request body bytes. `None` when there is no more body to read. pub async fn read_body_bytes(&mut self) -> Result> { - self.data_finished_event().await?; if self.read_ended { return Ok(None); } + // FIXME: this is wrong, required to wait for data event first? + data_finished_event(self.stream_id, &mut self.event_rx).await?; + self.read_ended = true; + let mut buf = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; let size = match self.recv_body(&mut buf) { Ok(size) => size, @@ -505,7 +503,7 @@ impl H3Session { } Err(e) => { return Err(Error::explain( - ErrorType::ReadError, + ReadError, format!("reading body failed with {}", e), )) } @@ -584,14 +582,8 @@ impl H3Session { } async fn send_response(&self, headers: &[T], fin: bool) -> Result<()> { - self.stream_capacity(header_size(headers)) - .await - .explain_err(ErrorType::WriteError, |_| { - format!( - "H3 connection {:?} failed to acquire capacity for stream {}", - self.connection_id, self.stream_id - ) - })?; + stream_capacity(&self.quic_connection, self.stream_id, header_size(headers), + &self.rx_notify, &self.tx_notify).await?; let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); @@ -604,7 +596,7 @@ impl H3Session { match hconn.send_response(&mut qconn, self.stream_id, headers, fin) { Ok(()) => Ok(()), Err(h3::Error::Done) => Ok(()), - Err(e) => Err(e).explain_err(ErrorType::WriteError, |_| { + Err(e) => Err(e).explain_err(WriteError, |_| { "H3 connection failed to write response" }), } @@ -613,13 +605,12 @@ impl H3Session { /// Write response body to the client. See [Self::write_response_header] for how to use `end`. pub async fn write_body(&mut self, data: Bytes, end: bool) -> Result<()> { if self.send_ended { - // NOTE: in h1, we also track to see if content-length matches the data - // We have not tracked that in h3 + // NOTE: within http3 content-length tracking is not available warn!("Cannot write body after stream ended. Dropping the extra data."); return Ok(()); } else if self.response_header_written.is_none() { return Err(Error::explain( - ErrorType::H3Error, + H3Error, "trying to send the body before header being sent", )); }; @@ -628,15 +619,8 @@ impl H3Session { let mut fin = end; while sent_len < data.len() { let required = cmp::min(data.len() - sent_len, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let capacity = - self.stream_capacity(required) - .await - .explain_err(ErrorType::WriteError, |e| { - format!( - "Failed to acquire capacity on stream id {} with {}", - self.stream_id, e - ) - })?; + let capacity = stream_capacity(&self.quic_connection, self.stream_id, required, + &self.rx_notify, &self.tx_notify).await?; let send = if capacity > data.len() - sent_len { &data[sent_len..data.len()] @@ -651,7 +635,7 @@ impl H3Session { sent_len += sent_size; } Err(e) => { - return Err(e).explain_err(ErrorType::WriteError, |_| { + return Err(e).explain_err(WriteError, |_| { "writing h3 response body to downstream" }) } @@ -683,37 +667,13 @@ impl H3Session { hconn.send_body(&mut qconn, self.stream_id, body, fin) } - fn stream_capacity( - &self, - required: usize, - ) -> Pin> + Send + '_>> { - Box::pin(async move { - let capacity; - { - let qconn = self.quic_connection.lock(); - capacity = qconn.stream_capacity(self.stream_id)?; - } - - if capacity >= required { - Ok(capacity) - } else { - self.tx_notify.notify_waiters(); - self.rx_notify.notified().await; - self.stream_capacity(required).await - } - }) - } - /// Write response trailers to the client, this also closes the stream. pub async fn write_trailers(&mut self, trailers: HeaderMap) -> Result<()> { if self.send_ended { warn!("Tried to write trailers after end of stream, dropping them"); return Ok(()); } else if self.body_sent == 0 { - return Err(Error::explain( - ErrorType::H3Error, - "Trying to send trailers before body is sent.", - )); + return Err(Error::explain(H3Error,"Trying to send trailers before body is sent.")); }; let headers = headermap_to_headervec(&trailers); @@ -733,14 +693,8 @@ impl H3Session { is_trailer: bool, fin: bool, ) -> Result<()> { - self.stream_capacity(header_size(headers)) - .await - .explain_err(ErrorType::WriteError, |_| { - format!( - "H3 connection {:?} failed to acquire capacity for stream {}", - self.connection_id, self.stream_id - ) - })?; + stream_capacity(&self.quic_connection, self.stream_id, header_size(headers), + &self.rx_notify, &self.tx_notify).await?; let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); @@ -759,7 +713,7 @@ impl H3Session { self.tx_notify.notify_waiters(); Ok(()) } - Err(e) => Err(e).explain_err(ErrorType::WriteError, |_| { + Err(e) => Err(e).explain_err(WriteError, |_| { "H3 connection failed to write h3 trailers to downstream" }), } @@ -789,7 +743,7 @@ impl H3Session { if self.response_header_written.is_some() { // use an empty data frame to signal the end self.send_body(&[], true).explain_err( - ErrorType::WriteError, + WriteError, |e| format! {"Writing h3 response body to downstream failed. {e}"}, )?; self.tx_notify.notify_waiters(); @@ -800,60 +754,7 @@ impl H3Session { Ok(()) } - async fn data_finished_event(&mut self) -> Result<()> { - loop { - match self.event_rx.recv().await { - Some(ev) => { - match ev { - Event::Finished => { - trace!("stream {} event {:?}", self.stream_id, ev); - self.read_ended = true; - return Ok(()); - } - Event::Headers { .. } => { - debug_assert!(false, "Headers or Finished event when Data requested"); - } - Event::Data => { - trace!("stream {} event {:?}", self.stream_id, ev); - return Ok(()); - } - Event::Reset(error_code) => { - return Err(Error::explain( - ErrorType::H3Error, - format!("stream was reset with error code {}", error_code), - )) - } - Event::PriorityUpdate => { - // TODO: this step should be deferred until - // h3::Connection::poll() returns Error::Done - // see also h3::Connection::send_response_with_priority() - - /* - // https://datatracker.ietf.org/doc/rfc9218/ - let mut hconn = self.h3_connection.lock(); - // field value has the same content as the header::Priority field - let field_value = hconn.take_last_priority_update(self.stream_id) - .explain_err(ErrorType::H3Error, "failed to receive priority update field value")?; - */ - warn!("received unhandled priority update"); - continue; - } - Event::GoAway => { - // RFC 9114 Section 5.2 & 7.2.6 - warn!("received unhandled go-away"); - continue; - } - } - } - None => { - return Err(Error::explain( - ErrorType::ReadError, - "H3 session event channel disconnected", - )) - } - } - } - } + async fn reset_event(&mut self) -> Result { loop { @@ -870,7 +771,7 @@ impl H3Session { } None => { return Err(Error::explain( - ErrorType::ReadError, + ReadError, "H3 session event channel disconnected", )) } @@ -1024,7 +925,7 @@ impl H3Session { if no_body_expected || self.is_body_done() { let reason = self.reset_event().await?; Error::e_explain( - ErrorType::H3Error, + H3Error, format!("Client closed H3, reason: {reason}"), ) } else { diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index f5bec890e..3c51996f4 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -49,9 +49,9 @@ impl Connection { format!("failed to get peer address from socket: {}", e) })?; - let configs = configs.unwrap_or(QuicHttp3Configs::try_from( - QuicHttp3Configs::new_quic_connector(None)?, - )?); + let configs = configs.unwrap_or( + QuicHttp3Configs::from_ca_file_path(None)? + ); let (gso_enabled, pacing_enabled) = detect_gso_pacing(&io); Ok(Self::OutgoingHandshake(OutgoingHandshakeState { diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 199a2b794..a12c54244 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -292,6 +292,22 @@ impl QuicHttp3Configs { quic.grease(false); // default true + quic.set_max_idle_timeout(600 * 1000); // default ulimited + quic.set_max_recv_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // recv default is 65527 + quic.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 + quic.set_initial_max_data(10_000_000); // 10 Mb + quic.set_initial_max_stream_data_bidi_local(1_000_000); // 1 Mb + quic.set_initial_max_stream_data_bidi_remote(1_000_000); // 1 Mb + quic.set_initial_max_stream_data_uni(1_000_000); // 1 Mb + quic.set_initial_max_streams_bidi(100); + quic.set_initial_max_streams_uni(100); + + quic.set_disable_active_migration(true); // default is false + + // quic.set_active_connection_id_limit(2); // default 2 + // quic.set_max_connection_window(conn_args.max_window); // default 24 Mb + // quic.set_max_stream_window(conn_args.max_stream_window); // default 16 Mb + Ok(quic) } pub fn new_quic_listener(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { @@ -369,6 +385,13 @@ impl QuicHttp3Configs { }) } + pub fn from_ca_file_path(trust_origin_ca_pem: Option<&str>) -> Result { + Ok(Self { + quic: Arc::new(Mutex::new(Self::new_quic_connector(trust_origin_ca_pem)?)), + http3: Arc::new(Self::new_http3()?), + }) + } + pub fn from_cert_key_paths(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { Ok(Self { quic: Arc::new(Mutex::new(Self::new_quic_listener( @@ -567,6 +590,10 @@ impl Ssl for Connection { let conn = s.connection.lock(); ALPN::from_wire_selected(conn.application_proto()) } + Connection::OutgoingEstablished(s) => { + let conn = s.connection.lock(); + ALPN::from_wire_selected(conn.application_proto()) + } _ => None, } } diff --git a/pingora-core/src/protocols/l4/stream.rs b/pingora-core/src/protocols/l4/stream.rs index aaa475e7d..58bc17cdb 100644 --- a/pingora-core/src/protocols/l4/stream.rs +++ b/pingora-core/src/protocols/l4/stream.rs @@ -538,7 +538,15 @@ impl ConnectionState for Stream { } } -impl Ssl for Stream {} +impl Ssl for Stream { + fn selected_alpn_proto(&self) -> Option { + match &self.stream.get_ref().stream { + RawStream::Quic(s) => s.selected_alpn_proto(), + RawStream::Tcp(_) => None, + RawStream::Unix(_) => None, + } + } +} #[async_trait] impl Peek for Stream { @@ -829,6 +837,7 @@ pub mod async_write_vec { } pub use async_write_vec::AsyncWriteVec; +use crate::listeners::ALPN; #[derive(Debug)] struct AccumulatedDuration { diff --git a/pingora-core/src/protocols/tls/quic/client.rs b/pingora-core/src/protocols/tls/quic/client.rs index 6596457d1..770638c2f 100644 --- a/pingora-core/src/protocols/tls/quic/client.rs +++ b/pingora-core/src/protocols/tls/quic/client.rs @@ -123,20 +123,10 @@ where tx_notify: tx_notify.clone(), }; - let e_state = OutgoingEstablishedState { - connection_id: conn_id.clone(), - connection: connection.clone(), - http3_config: configs.http3().clone(), - - socket: socket_details.io.clone(), - rx_notify: rx_notify.clone(), - tx_notify: tx_notify.clone(), - - rx_handle: tokio::task::spawn(rx.start()), - tx_handle: tokio::task::spawn(tx.start()), - }; - + let rx_handle = tokio::task::spawn(rx.start()); // starting the ConnectionTx task sent the initial handshake packet + let tx_handle = tokio::task::spawn(tx.start()); + loop { // wait for the response rx_notify.notified().await; @@ -156,7 +146,7 @@ where handle_connection_errors(conn_id.clone(), conn.peer_error(), conn.local_error())?; if conn.is_established() { // send HANDSHAKE_DONE Quic frame on established connection - e_state.tx_notify.notify_waiters(); + tx_notify.notify_waiters(); break; } } @@ -164,5 +154,20 @@ where tx_notify.notify_waiters(); } + + let e_state = OutgoingEstablishedState { + connection_id: conn_id.clone(), + connection: connection.clone(), + + http3_config: configs.http3().clone(), + + socket: socket_details.io.clone(), + rx_notify: rx_notify.clone(), + tx_notify: tx_notify.clone(), + + rx_handle, + tx_handle, + }; + Ok(e_state) } diff --git a/pingora-core/src/upstreams/peer.rs b/pingora-core/src/upstreams/peer.rs index ced9ecdaf..56899067a 100644 --- a/pingora-core/src/upstreams/peer.rs +++ b/pingora-core/src/upstreams/peer.rs @@ -36,6 +36,7 @@ use crate::protocols::l4::socket::SocketAddr; use crate::protocols::tls::CaType; #[cfg(unix)] use crate::protocols::ConnFdReusable; +use crate::protocols::l4::quic::QuicHttp3Configs; use crate::protocols::TcpKeepalive; use crate::utils::tls::{get_organization_unit, CertKey}; @@ -332,6 +333,10 @@ pub struct PeerOptions { pub h2_ping_interval: Option, // how many concurrent h2 stream are allowed in the same connection pub max_h2_streams: usize, + // how many concurrent h3 stream are allowed in the same connection + pub max_h3_streams: usize, + // quic and http3 configs (quiche) + pub quic_http3_config: Option, pub extra_proxy_headers: BTreeMap>, // The list of curve the tls connection should advertise // if `None`, the default curves will be used @@ -366,6 +371,8 @@ impl PeerOptions { dscp: None, h2_ping_interval: None, max_h2_streams: 1, + max_h3_streams: 1, + quic_http3_config: None, extra_proxy_headers: BTreeMap::new(), curves: None, second_keyshare: true, // default true and noop when not using PQ curves From 49aec002df034aacd274e8dc2fe4dac7d4372f5d Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 23 Jan 2025 17:56:52 +0100 Subject: [PATCH 31/52] setup close watch channel for idle connections --- pingora-core/src/connectors/http/v3.rs | 183 ++++++++++++++----- pingora-core/src/protocols/http/v3/client.rs | 171 +++++++++-------- pingora-core/src/protocols/http/v3/mod.rs | 3 +- 3 files changed, 232 insertions(+), 125 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 50cef89bc..c47cf0403 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -3,71 +3,113 @@ use super::HttpSession; use crate::connectors::http::InUsePool; use crate::connectors::{ConnectorOptions, TransportConnector}; use crate::protocols::http::v3::client::{Http3Poll, Http3Session}; -use crate::protocols::http::v3::nohash::StreamIdHashMap; -use crate::protocols::http::v3::H3_SESSION_EVENTS_CHANNEL_SIZE; use crate::protocols::l4::quic::{Connection, Crypto}; use crate::protocols::{Digest, Stream, UniqueID, UniqueIDType}; use crate::upstreams::peer::{Peer, ALPN}; -use log::{debug, error}; +use log::debug; use parking_lot::Mutex; -use pingora_error::ErrorType::{H2Error, HandshakeError, InternalError}; +use pingora_error::ErrorType::{H3Error, HandshakeError, InternalError}; use pingora_error::{Error, ErrorType, OrErr, Result}; -use pingora_pool::ConnectionPool; +use pingora_pool::{ConnectionMeta, ConnectionPool}; use quiche::h3::Event; use quiche::ConnectionId; use std::collections::VecDeque; -use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Duration; -use tokio::sync::mpsc::Sender; -use tokio::sync::Notify; -use tokio::sync::mpsc; +use tokio::sync::{watch, mpsc, Notify}; use tokio::task::JoinHandle; -use pingora_runtime::current_handle; // FIXME: ConnectorOptions contains CA file path from ServerConfig +#[derive(Clone)] pub(crate) struct ConnectionRef(Arc); impl ConnectionRef { - pub fn new(l4_stream: Stream, digest: Digest, conn_io: ConnectionIo, - add_sessions: Arc)>>>, + pub fn new(l4_stream: Stream, conn_io: ConnectionIo, digest: Digest, + add_sessions: Arc)>>>, drop_sessions: Arc>>, + idle_close: watch::Receiver, max_streams: usize, h3poll_task: JoinHandle>, ) -> Self { Self(Arc::new(ConnectionRefInner { l4_stream, - digest, conn_io, + + digest, max_streams, current_streams: AtomicUsize::new(0), - max_initiated_stream_id: AtomicU64::new(0), + release_lock: Arc::new(Default::default()), + add_sessions, drop_sessions, - h3poll_task + idle_close, + h3poll_task, })) } } +impl ConnectionRef { + pub(crate) fn conn_id(&self) -> &ConnectionId<'_> { + &self.0.conn_io.conn_id + } + + pub(crate) fn conn_io(&self) -> &ConnectionIo { + &self.0.conn_io + } + + pub(crate) fn digest(&self) -> &Digest { + &self.0.digest + } + + pub(crate) fn add_session(&self, stream_id: u64, tx: mpsc::Sender) { + let mut add_sessions = self.0.add_sessions.lock(); + add_sessions.push_back((stream_id, tx)) + } + + pub(crate) fn drop_session(&self, stream_id: u64) { + self.0.current_streams.fetch_sub(1, Ordering::SeqCst); + let mut drop_sessions = self.0.drop_sessions.lock(); + drop_sessions.push_back(stream_id); + } + + fn is_closed(&self) -> bool { + *self.0.idle_close.borrow() + } + + fn is_shutting_down(&self) -> bool { + self.conn_io().is_shutting_down() + } + + fn is_idle(&self) -> bool { + self.0.current_streams.load(Ordering::SeqCst) == 0 + } +} + pub(crate) struct ConnectionRefInner { // avoid dropping stream, & used for UniqueIDType l4_stream: Stream, - digest: Digest, + // resources required for Http3, Quic & network IO conn_io: ConnectionIo, + // connection digest + digest: Digest, + // max concurrent streams this connection is allowed to create max_streams: usize, // how many concurrent streams already active current_streams: AtomicUsize, - // last initiated stream_id - max_initiated_stream_id: AtomicU64, + // lock is used during moving the connection across pools + release_lock: Arc>, - // to remove sessions from the H3Poll tasks - add_sessions: Arc)>>>, - // to remove sessions from the H3Poll tasks + // add session to active sessions in Http3Poll task + add_sessions: Arc)>>>, + // remove session from active sessions in Http3Poll task drop_sessions: Arc>>, + // watch for idle pool timeouts + idle_close: watch::Receiver, h3poll_task: JoinHandle> } @@ -99,17 +141,21 @@ pub(crate) struct ConnectionIo { } +impl ConnectionIo { + fn is_shutting_down(&self) -> bool { + let qconn = self.quic.lock(); + qconn.is_draining() + } +} + /// Http3 connector pub struct Connector { - // just for creating connections, the Stream of h2 should be reused + // for creating connections, the Stream for h3 should be reused transport: TransportConnector, - // the h2 connection idle pool - //idle_pool: Arc>, - // the pool of h2 connections that have ongoing streams - //in_use_pool: crate::connectors::http::v2::InUsePool, - in_use_pool: InUsePool, // the h3 connection idle pool idle_pool: Arc>, + // the pool of h3 connections that have ongoing streams + in_use_pool: InUsePool, crypto: Option, } @@ -157,6 +203,9 @@ impl Connector { .get(reuse_hash) .or_else(|| self.idle_pool.get(&reuse_hash)); if let Some(conn) = maybe_conn { + // lock the connection before adding a stream + // ensures that moving between pools and e.g. idle() checks is guarded + let _release_lock = conn.0.release_lock.lock_arc(); let h3_stream = conn.spawn_stream().await?; if conn.more_streams_allowed() { self.in_use_pool.insert(reuse_hash, conn); @@ -179,7 +228,46 @@ impl Connector { peer: &P, idle_timeout: Option, ) { - todo!() + let id = session.conn().id(); + let reuse_hash = peer.reuse_hash(); + // get a ref to the connection, which we might need below, before dropping the h3 + let conn = session.conn(); + + // The lock here is to make sure that in_use_pool.insert() below cannot be called after + // in_use_pool.release(), which would have put the conn entry in both pools. + // It also makes sure that only one conn will trigger the conn.is_idle() condition, which + // avoids putting the same conn into the idle_pool more than once. + let locked = conn.0.release_lock.lock_arc(); + // TODO: should a stream_reset be called during drop? + // this drop() will both drop the actual stream and call the conn.release_stream() + drop(session); + // find and remove the conn stored in in_use_pool so that it could be put in the idle pool + // if necessary + let conn = self.in_use_pool.release(reuse_hash, id).unwrap_or(conn); + if conn.is_closed() || conn.is_shutting_down() { + // should never be put back to the pool + return; + } + if conn.is_idle() { + let meta = ConnectionMeta { + key: reuse_hash, + id, + }; + let idle_closed = conn.0.idle_close.clone(); + let (notify_evicted, watch_use) = self.idle_pool.put(&meta, conn); + drop(locked); + if let Some(to) = idle_timeout { + let pool = self.idle_pool.clone(); // clone the arc + let rt = pingora_runtime::current_handle(); + rt.spawn(async move { + pool.idle_timeout(&meta, to, notify_evicted, idle_closed, watch_use) + .await; + }); + } + } else { + self.in_use_pool.insert(reuse_hash, conn); + drop(locked); + } } /// Create a new Http3 connection to the given server @@ -187,19 +275,7 @@ impl Connector { &self, peer: &P, ) -> Result { - let mut stream = self.transport.new_stream(peer).await?; - error!("{:?}", stream.is_quic_connection()); - if let Some(qconn) = stream.quic_connection_state() { - match qconn { - Connection::IncomingHandshake(_) => {} - Connection::IncomingEstablished(_) => {} - Connection::OutgoingHandshake(_) => {} - Connection::OutgoingEstablished(e) => { - error!("established {:?}", qconn); - } - } - } - error!("{:?}", stream.selected_alpn_proto()); + let stream = self.transport.new_stream(peer).await?; // TODO: verify & check how this can fit into TCP/UDP picture // check alpn match stream.selected_alpn_proto() { @@ -238,11 +314,7 @@ impl ConnectionRef { return Ok(None); } - let h3_session = Http3Session::new( - self.0.conn_io.clone(), - self.0.add_sessions.clone(), - self.0.drop_sessions.clone())?; - + let h3_session = Http3Session::new(self.clone())?; Ok(Some(h3_session)) } @@ -261,10 +333,9 @@ async fn handshake( ) -> Result { // Safe guard: new_http_session() assumes there should be at least one free stream if max_streams == 0 { - return Error::e_explain(H2Error, "zero max_stream configured"); + return Error::e_explain(H3Error, "zero max_stream configured"); } - let unique_id = stream.id(); let digest = Digest { // NOTE: this field is always false because the digest is shared across all streams // The streams should log their own reuse info @@ -305,21 +376,24 @@ async fn handshake( let add_sessions = Arc::new(Mutex::new(VecDeque::default())); let drop_sessions = Arc::new(Mutex::new(VecDeque::default())); + let (idle_close_tx, idle_close_rx) = watch::channel::(false); let h3poll = Http3Poll { conn_io: conn_io.clone(), sessions: Default::default(), add_sessions: add_sessions.clone(), drop_sessions: drop_sessions.clone(), + idle_close: idle_close_tx }; - let h3poll_task = current_handle().spawn(h3poll.start()); + let h3poll_task = pingora_runtime::current_handle().spawn(h3poll.start()); Ok(ConnectionRef::new( stream, - digest, conn_io, + digest, add_sessions, drop_sessions, + idle_close_rx, max_streams, h3poll_task )) @@ -327,7 +401,9 @@ async fn handshake( #[cfg(test)] mod quic_tests { + use bytes::Bytes; use http::Version; + use zstd::zstd_safe::WriteBuf; use crate::connectors::quic_tests::quic_listener_peer; use pingora_error::Result; use pingora_http::RequestHeader; @@ -345,8 +421,10 @@ mod quic_tests { req.insert_header(http::header::HOST, "openresty.org")?; session.write_request_header(Box::new(req)).await?; + session.write_request_body(Bytes::from(b"hello world".as_slice()), false).await?; session.finish_request_body().await?; session.read_response_header().await?; + let resp_body = session.read_response_body().await?; let resp = session.response_header(); @@ -356,6 +434,11 @@ mod quic_tests { assert_eq!(resp.version, Version::HTTP_3); } + assert!(resp_body.is_some()); + if let Some(resp_body) = resp_body { + assert_eq!(resp_body.as_slice(), b"hello world".as_slice()) + } + Ok(()) } } \ No newline at end of file diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index d9a123363..fd0f3b5c9 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -4,27 +4,32 @@ use std::fmt::Debug; use std::sync::Arc; use crate::connectors::http::v3::{ConnectionIo, ConnectionRef}; use crate::protocols::l4::socket::SocketAddr; -use crate::protocols::{Digest, UniqueIDType}; +use crate::protocols::{Digest, UniqueID, UniqueIDType}; use bytes::{BufMut, Bytes, BytesMut}; -use h2::SendStream; use http::HeaderMap; use pingora_http::{RequestHeader, ResponseHeader}; use std::time::Duration; use log::{debug, error, trace, warn}; use parking_lot::Mutex; -use quiche::{h3, ConnectionId}; +use quiche::{h3, Shutdown}; use quiche::h3::{Event, Header, NameValue}; -use tokio::sync::mpsc; +use tokio::sync::{mpsc, watch}; use tokio::sync::mpsc::{Receiver, Sender}; use pingora_error::{Error, ErrorType, OrErr, Result}; use pingora_error::ErrorType::{H3Error, InternalError, InvalidHTTPHeader, ReadError, WriteError}; -use crate::protocols::http::v3::{data_finished_event, event_to_response_headers, header_size, headervec_to_headermap, request_headers_to_event, stream_capacity, H3_SESSION_EVENTS_CHANNEL_SIZE}; +use crate::protocols::http::v3::{data_finished_event, event_to_response_headers, headervec_to_headermap, request_headers_to_event, stream_capacity, H3_SESSION_EVENTS_CHANNEL_SIZE}; use crate::protocols::http::v3::nohash::StreamIdHashMap; use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; + pub struct Http3Session { - conn_io: ConnectionIo, + conn: ConnectionRef, + + // stream id is assigned after the request has been sent + // quiche internally creates the underlying quic stream during quiche::h3::send_request() stream_id: Option, + // HTTP3 event channel for this stream_id + event_rx: Option>, /// The read timeout, which will be applied to both reading the header and the body. /// The timeout is reset on every read. This is not a timeout on the overall duration of the @@ -32,9 +37,6 @@ pub struct Http3Session { // FIXME: race with timeout if present pub read_timeout: Option, - // HTTP3 event channel for this stream_id - event_rx: Option>, - // sent request request_header_written: Option>, // received response @@ -49,44 +51,40 @@ pub struct Http3Session { body_read: usize, // read is finished (Quic finished frame received) read_ended: bool, +} - // remove session from active sessions - drop_sessions: Arc>>, - // add session to active sessions - add_sessions: Arc)>>> +impl Http3Session { + fn conn_io(&self) -> &ConnectionIo { + self.conn.conn_io() + } } + impl Drop for Http3Session { fn drop(&mut self) { if let Some(stream_id) = self.stream_id { - { - let mut sessions = self.drop_sessions.lock(); - sessions.push_back(stream_id); - } + self.conn.drop_session(stream_id); debug!("connection {:?} dropping session with stream id {}", - self.conn_io.conn_id, stream_id) + self.conn.conn_id(), stream_id) } + // TODO: clarify if a RESET_STREAM should be sent } } impl Http3Session { - pub(crate) fn new(conn_io: ConnectionIo, - add_sessions: Arc)>>>, - drop_sessions: Arc>>) - -> Result { + pub(crate) fn new(conn: ConnectionRef) -> Result { Ok(Self { - conn_io, + conn, stream_id: None, - read_timeout: None, event_rx: None, + + read_timeout: None, request_header_written: None, response_header: None, body_sent: 0, send_ended: false, body_read: 0, read_ended: false, - add_sessions, - drop_sessions, }) } @@ -113,8 +111,8 @@ impl Http3Session { // sending the request creates the underlying quic stream & according stream id // it is not possible to check the stream capacity before sending the request let stream_id = { - let mut qconn = self.conn_io.quic.lock(); - let mut hconn = self.conn_io.http3.lock(); + let mut qconn = self.conn_io().quic.lock(); + let mut hconn = self.conn_io().http3.lock(); hconn.send_request(&mut qconn, headers, fin) .explain_err(WriteError, |_| "failed to send http3 request headers")? @@ -124,11 +122,7 @@ impl Http3Session { self.stream_id = Some(stream_id); self.event_rx = Some(rx); - { - let mut add_sessions = self.add_sessions.lock(); - add_sessions.push_back((stream_id, tx)) - } - + self.conn.add_session(stream_id, tx); Ok(stream_id) } @@ -154,8 +148,8 @@ impl Http3Session { let mut fin = end; while sent_len < data.len() { let required = cmp::min(data.len() - sent_len, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let capacity = stream_capacity(&self.conn_io.quic, stream_id, required, - &self.conn_io.rx_notify, &self.conn_io.tx_notify).await?; + let capacity = stream_capacity(&self.conn_io().quic, stream_id, required, + &self.conn_io().rx_notify, &self.conn_io().tx_notify).await?; let send = if capacity > data.len() - sent_len { &data[sent_len..data.len()] @@ -179,7 +173,7 @@ impl Http3Session { debug_assert_eq!(fin, end); debug_assert_eq!(sent_len, data.len()); if end { - self.conn_io.tx_notify.notify_waiters(); + self.conn_io().tx_notify.notify_waiters(); } self.body_sent += sent_len; @@ -190,8 +184,8 @@ impl Http3Session { // TODO: potentially refactor/unify with server side fn send_body(&self, body: &[u8], fin: bool) -> Result { - let mut qconn = self.conn_io.quic.lock(); - let mut hconn = self.conn_io.http3.lock(); + let mut qconn = self.conn_io().quic.lock(); + let mut hconn = self.conn_io().http3.lock(); hconn.send_body(&mut qconn, self.stream_id()?, body, fin) .explain_err(WriteError, |e| format!("failed to send http3 request body {:?}", e)) @@ -211,7 +205,7 @@ impl Http3Session { WriteError, |e| format! {"Writing h3 request body finished to downstream failed. {e}"}, )?; - self.conn_io.tx_notify.notify_waiters(); + self.conn_io().tx_notify.notify_waiters(); self.send_ended = true; } // else: the response header is not sent, do nothing now. @@ -256,7 +250,6 @@ impl Http3Session { return Ok(None); } - let read_timeout = self.read_timeout.clone(); tokio::select! { res = data_finished_event(self.stream_id()?, self.event_rx()?) => { @@ -265,9 +258,9 @@ impl Http3Session { }, _timedout = async { if let Some(read_timeout) = read_timeout { - tokio::time::sleep(read_timeout) + tokio::time::sleep(read_timeout).await; } else { - tokio::time::sleep(Duration::MAX) + tokio::time::sleep(Duration::MAX).await; } } => { return Err(Error::explain(ErrorType::ReadTimedout, "reading body timed out")) @@ -302,8 +295,8 @@ impl Http3Session { // TODO: potentially refactor/unify with server side // TODO: check if result type can be changed (requires Error::Done not being used) fn recv_body(&self, stream_id: u64, out: &mut [u8]) -> h3::Result { - let mut qconn = self.conn_io.quic.lock(); - let mut hconn = self.conn_io.http3.lock(); + let mut qconn = self.conn_io().quic.lock(); + let mut hconn = self.conn_io().http3.lock(); debug!( "H3 connection {:?} stream {} receiving body", qconn.trace_id(), stream_id @@ -318,10 +311,10 @@ impl Http3Session { } /// Check whether stream finished with error. - /// Like `response_finished`, but also attempts to poll the h2 stream for errors that may have - /// caused the stream to terminate, and returns them as `H2Error`s. + /// Like `response_finished`, but also attempts to poll the h3 stream for errors that may have + /// caused the stream to terminate, and returns them as `H3Error`s. pub fn check_response_end_or_error(&mut self) -> Result { - todo!() + todo!("within h2 this is used in pingora-proxy") } /// Read the optional trailer headers @@ -379,58 +372,81 @@ impl Http3Session { self.response_header.as_ref() } - /// Give up the http session abruptly. + // TODO: potentially refactor/unify with server side + /// Give up the stream abruptly. + /// + /// This will send a `STOP_SENDING` and a `RESET_STREAM` for the Quic stream to the client. pub fn shutdown(&mut self) { - todo!() + let stream_id = match self.stream_id() { + Ok(id) => id, + Err(_) => { + error!("failed to shutdown session, no stream id present"); + return + } + }; + + if !self.read_ended { + self.stream_shutdown(stream_id, Shutdown::Read, 2u64); + // sent STOP_SENDING frame & stream_recv() will no longer return data + self.read_ended = true; + } + if !self.send_ended { + self.stream_shutdown(stream_id, Shutdown::Write, 2u64); + // sent RESET_STREAM & stream_send() data will be ignored + self.send_ended = true; + } } - /// Drop everything in this h2 stream. Return the connection ref. - /// After this function the underlying h2 connection should already notify the closure of this - /// stream so that another stream can be created if needed. - pub(crate) fn conn(&self) -> ConnectionRef { - todo!() + // TODO: potentially refactor/unify with server side + fn stream_shutdown(&self, stream_id: u64, direction: Shutdown, error_code: u64) { + let mut qconn = self.conn_io().quic.lock(); + match qconn.stream_shutdown(stream_id, direction, error_code) { + Ok(()) => self.conn_io().tx_notify.notify_waiters(), + Err(e) => warn!("h3 stream {} shutdown failed. {:?}", stream_id, e), + } } - /// Whether ping timeout occurred. After a ping timeout, the h2 connection will be terminated. - /// Ongoing h2 streams will receive an stream/connection error. The streams should check this - /// flag to tell whether the error is triggered by the timeout. - pub(crate) fn ping_timedout(&self) -> bool { - todo!() + /// Return the [`ConnectionRef`] of the Http3Session + pub(crate) fn conn(&self) -> ConnectionRef { + self.conn.clone() } - /// Return the [Digest] of the connection + /// Return the [`Digest`] of the connection /// /// For reused connection, the timing in the digest will reflect its initial handshakes /// The caller should check if the connection is reused to avoid misuse the timing field. pub fn digest(&self) -> Option<&Digest> { - todo!() + Some(&self.conn.digest()) } - /// Return a mutable [Digest] reference for the connection + /// Return a mutable [`Digest`] reference for the connection /// - /// Will return `None` if multiple H2 streams are open. + /// Will return `None` if multiple H3 streams are open. pub fn digest_mut(&mut self) -> Option<&mut Digest> { - todo!() + todo!("needs an arc in order to get_mut successfully") } /// Return the server (peer) address recorded in the connection digest. pub fn server_addr(&self) -> Option<&SocketAddr> { - todo!() + self.conn + .digest() + .socket_digest + .as_ref() + .map(|d| d.peer_addr())? } /// Return the client (local) address recorded in the connection digest. pub fn client_addr(&self) -> Option<&SocketAddr> { - todo!() + self.conn + .digest() + .socket_digest + .as_ref() + .map(|d| d.local_addr())? } /// the FD of the underlying connection pub fn fd(&self) -> UniqueIDType { - todo!() - } - - /// take the body sender to another task to perform duplex read and write - pub fn take_request_body_writer(&mut self) -> Option> { - todo!() + self.conn.id() } } @@ -481,14 +497,21 @@ pub(crate) struct Http3Poll { pub(crate) sessions: StreamIdHashMap>, pub(crate) drop_sessions: Arc>>, pub(crate) add_sessions: Arc)>>>, + pub(crate) idle_close: watch::Sender, } impl Http3Poll { pub(crate) async fn start(mut self) -> Result<()> { -// let conn_id = self.conn_io.conn_id.clone(); + let conn_id = self.conn_io.conn_id.clone(); 'poll: loop { let res = { let mut qconn = self.conn_io.quic.lock(); + if qconn.is_closed() { + self.idle_close.send_replace(true); + break 'poll Err(Error::explain( + H3Error, format!("quic connection {:?} is closed stopping", conn_id))); + } + let mut hconn = self.conn_io.http3.lock(); hconn.poll(&mut qconn) }; @@ -498,6 +521,8 @@ impl Http3Poll { Err(e) => match e { h3::Error::Done => { self.sessions_housekeeping()?; + + // TODO: connection timeout racing self.conn_io.rx_notify.notified().await; continue 'poll } diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index adb8c3f42..98f7295c7 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -15,7 +15,7 @@ //! HTTP/3 implementation use http::{HeaderMap, HeaderName, HeaderValue, Request, Uri, Version}; -use log::{error, trace, warn}; +use log::{trace, warn}; use pingora_error::{Error, ErrorType, OrErr, Result}; use pingora_http::{RequestHeader, ResponseHeader}; use quiche::h3::{Event, Header, NameValue}; @@ -28,7 +28,6 @@ use quiche::Connection; use tokio::sync::mpsc::Receiver; use tokio::sync::Notify; use pingora_error::ErrorType::{H3Error, InvalidHTTPHeader, ReadError}; -use crate::protocols::http::HttpVersion; pub const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; pub const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; From 8cb7ec4cb8a7ab6bff973ec828ad58db91aaace1 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 23 Jan 2025 19:00:58 +0100 Subject: [PATCH 32/52] enhance UDP/Http3 selection --- pingora-core/src/connectors/http/v3.rs | 27 +- pingora-core/src/connectors/l4.rs | 270 +++++++++--------- pingora-core/src/connectors/mod.rs | 32 ++- pingora-core/src/protocols/http/v3/client.rs | 3 +- pingora-core/src/protocols/http/v3/server.rs | 1 - pingora-core/src/protocols/tls/quic/client.rs | 4 +- pingora-core/src/upstreams/peer.rs | 29 +- 7 files changed, 193 insertions(+), 173 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index c47cf0403..40e3bcfed 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -9,7 +9,7 @@ use crate::upstreams::peer::{Peer, ALPN}; use log::debug; use parking_lot::Mutex; use pingora_error::ErrorType::{H3Error, HandshakeError, InternalError}; -use pingora_error::{Error, ErrorType, OrErr, Result}; +use pingora_error::{Error, OrErr, Result}; use pingora_pool::{ConnectionMeta, ConnectionPool}; use quiche::h3::Event; use quiche::ConnectionId; @@ -166,8 +166,8 @@ impl Connector { let pool_size = options .as_ref() .map_or(DEFAULT_POOL_SIZE, |o| o.keepalive_pool_size); - // connection offload is handled by the [TransportConnector] + // connection offload is handled by the [TransportConnector] Self { transport: TransportConnector::new(options), idle_pool: Arc::new(ConnectionPool::new(pool_size)), @@ -276,14 +276,11 @@ impl Connector { peer: &P, ) -> Result { let stream = self.transport.new_stream(peer).await?; - // TODO: verify & check how this can fit into TCP/UDP picture + // check alpn match stream.selected_alpn_proto() { Some(ALPN::H3) => { /* continue */ } - _ => { - // FIXME: correctly route ALPNs - return Err(Error::explain(ErrorType::InternalError, "alpn does not match h3")) - } + _ => return Err(Error::explain(InternalError, "peer ALPN is not H3")) } let max_h3_stream = peer.get_peer_options().map_or(1, |o| o.max_h3_streams); @@ -400,7 +397,7 @@ async fn handshake( } #[cfg(test)] -mod quic_tests { +mod tests { use bytes::Bytes; use http::Version; use zstd::zstd_safe::WriteBuf; @@ -408,6 +405,7 @@ mod quic_tests { use pingora_error::Result; use pingora_http::RequestHeader; use super::*; + use crate::upstreams::peer::HttpPeer; #[tokio::test] async fn test_connector_quic_http3() -> Result<()> { @@ -441,4 +439,17 @@ mod quic_tests { Ok(()) } + + #[tokio::test] + #[cfg(feature = "any_tls")] + async fn test_connect_h3() { + let connector = Connector::new(None); + let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into()); + peer.options.set_http_version(3, 3); + let h3 = connector.new_http_session(&peer).await.unwrap(); + match h3 { + HttpSession::H1(_) | HttpSession::H2(_) => panic!("expect h3"), + HttpSession::H3(_h3_session) => assert!(true), + } + } } \ No newline at end of file diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index 144acfc11..ca1ca2cb2 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -21,7 +21,6 @@ use std::net::SocketAddr as InetSocketAddr; use std::os::unix::io::AsRawFd; #[cfg(windows)] use std::os::windows::io::AsRawSocket; - #[cfg(unix)] use crate::protocols::l4::ext::connect_uds; use crate::protocols::l4::ext::{ @@ -32,7 +31,7 @@ use crate::protocols::l4::quic::Connection; use crate::protocols::l4::socket::SocketAddr; use crate::protocols::l4::stream::Stream; use crate::protocols::{GetSocketDigest, SocketDigest}; -use crate::upstreams::peer::{IpProto, Peer}; +use crate::upstreams::peer::Peer; /// The interface to establish a L4 connection #[async_trait] @@ -101,7 +100,6 @@ where .await .err_context(|| format!("Fail to establish CONNECT proxy: {}", peer)); } - let peer_ip_proto = peer.ip_proto(); let peer_addr = peer.address(); let mut stream: Stream = if let Some(custom_l4) = @@ -109,132 +107,12 @@ where { custom_l4.connect(peer_addr).await? } else { - // FIXME: consider directly using peers ALPN setting for proto selection - if matches!(peer_ip_proto, IpProto::UDP) { - match peer_addr { - SocketAddr::Inet(addr) => { - let connect_future = udp_connect(addr, bind_to.as_ref(), |socket| { - #[cfg(unix)] - let raw = socket.as_raw_fd(); - #[cfg(windows)] - let raw = socket.as_raw_socket(); - - if let Some(dscp) = peer.dscp() { - debug!("Setting dscp"); - set_dscp(raw, dscp)?; - } - Ok(()) - }); - let conn_res = match peer.connection_timeout() { - Some(t) => pingora_timeout::timeout(t, connect_future) - .await - .explain_err(ConnectTimedout, |_| { - format!("timeout {t:?} connecting to server {peer}") - })?, - None => connect_future.await, - }; - let socket = match conn_res { - Ok(socket) => { - debug!("connected to new server: {}", peer.address()); - Ok(socket.into()) - } - Err(e) => { - let c = format!("Fail to connect to {peer}"); - match e.etype() { - SocketError | BindError => Error::e_because(InternalError, c, e), - _ => Err(e.more_context(c)), - } - } - }?; - - let mut quic_http3_config = None; - if let Some(peer_options) = peer.get_peer_options() { - quic_http3_config = peer_options.quic_http3_config.clone() - }; - - // FIXME: supply configs & default configs - Connection::initiate(socket, quic_http3_config)?.into() - } - SocketAddr::Unix(_addr) => { - // TODO: tokio::net::UnixDatagram support could be an option - // send_to(), recv_from() are using a file path with UnixDatagram - // need to verify if Quic/quiche can handle paths as SocketAddr - todo!() - } - } + if peer.udp_http3() { + // create UDP sockets + inner_udp_connect(peer, &bind_to, peer_addr).await? } else { - match peer_addr { - SocketAddr::Inet(addr) => { - let connect_future = tcp_connect(addr, bind_to.as_ref(), |socket| { - #[cfg(unix)] - let raw = socket.as_raw_fd(); - #[cfg(windows)] - let raw = socket.as_raw_socket(); - - if peer.tcp_fast_open() { - set_tcp_fastopen_connect(raw)?; - } - if let Some(recv_buf) = peer.tcp_recv_buf() { - debug!("Setting recv buf size"); - set_recv_buf(raw, recv_buf)?; - } - if let Some(dscp) = peer.dscp() { - debug!("Setting dscp"); - set_dscp(raw, dscp)?; - } - Ok(()) - }); - let conn_res = match peer.connection_timeout() { - Some(t) => pingora_timeout::timeout(t, connect_future) - .await - .explain_err(ConnectTimedout, |_| { - format!("timeout {t:?} connecting to server {peer}") - })?, - None => connect_future.await, - }; - match conn_res { - Ok(socket) => { - debug!("connected to new server: {}", peer.address()); - Ok(socket.into()) - } - Err(e) => { - let c = format!("Fail to connect to {peer}"); - match e.etype() { - SocketError | BindError => Error::e_because(InternalError, c, e), - _ => Err(e.more_context(c)), - } - } - } - } - #[cfg(unix)] - SocketAddr::Unix(addr) => { - let connect_future = connect_uds( - addr.as_pathname() - .expect("non-pathname unix sockets not supported as peer"), - ); - let conn_res = match peer.connection_timeout() { - Some(t) => pingora_timeout::timeout(t, connect_future) - .await - .explain_err(ConnectTimedout, |_| { - format!("timeout {t:?} connecting to server {peer}") - })?, - None => connect_future.await, - }; - match conn_res { - Ok(socket) => { - debug!("connected to new server: {}", peer.address()); - Ok(socket.into()) - } - Err(e) => { - let c = format!("Fail to connect to {peer}"); - match e.etype() { - SocketError | BindError => Error::e_because(InternalError, c, e), - _ => Err(e.more_context(c)), - } - } - } - } - }? + // create TCP sockets + inner_tcp_connect(peer, bind_to, peer_addr).await? } }; @@ -263,6 +141,142 @@ where Ok(stream) } +async fn inner_tcp_connect

(peer: &P, bind_to: Option, peer_addr: &SocketAddr) -> Result +where + P: Peer + Send + Sync +{ + match peer_addr { + SocketAddr::Inet(addr) => { + let connect_future = tcp_connect(addr, bind_to.as_ref(), |socket| { + #[cfg(unix)] + let raw = socket.as_raw_fd(); + #[cfg(windows)] + let raw = socket.as_raw_socket(); + + if peer.tcp_fast_open() { + set_tcp_fastopen_connect(raw)?; + } + if let Some(recv_buf) = peer.tcp_recv_buf() { + debug!("Setting recv buf size"); + set_recv_buf(raw, recv_buf)?; + } + if let Some(dscp) = peer.dscp() { + debug!("Setting dscp"); + set_dscp(raw, dscp)?; + } + Ok(()) + }); + let conn_res = match peer.connection_timeout() { + Some(t) => pingora_timeout::timeout(t, connect_future) + .await + .explain_err(ConnectTimedout, |_| { + format!("timeout {t:?} connecting to server {peer}") + })?, + None => connect_future.await, + }; + match conn_res { + Ok(socket) => { + debug!("connected to new server: {}", peer.address()); + Ok(socket.into()) + } + Err(e) => { + let c = format!("Fail to connect to {peer}"); + match e.etype() { + SocketError | BindError => Error::e_because(InternalError, c, e), + _ => Err(e.more_context(c)), + } + } + } + } + #[cfg(unix)] + SocketAddr::Unix(addr) => { + let connect_future = connect_uds( + addr.as_pathname() + .expect("non-pathname unix sockets not supported as peer"), + ); + let conn_res = match peer.connection_timeout() { + Some(t) => pingora_timeout::timeout(t, connect_future) + .await + .explain_err(ConnectTimedout, |_| { + format!("timeout {t:?} connecting to server {peer}") + })?, + None => connect_future.await, + }; + match conn_res { + Ok(socket) => { + debug!("connected to new server: {}", peer.address()); + Ok(socket.into()) + } + Err(e) => { + let c = format!("Fail to connect to {peer}"); + match e.etype() { + SocketError | BindError => Error::e_because(InternalError, c, e), + _ => Err(e.more_context(c)), + } + } + } + } + } +} + +async fn inner_udp_connect

(peer: &P, bind_to: &Option, peer_addr: &SocketAddr) -> Result +where + P: Peer + Send + Sync +{ + match peer_addr { + SocketAddr::Inet(addr) => { + let connect_future = udp_connect(addr, bind_to.as_ref(), |socket| { + #[cfg(unix)] + let raw = socket.as_raw_fd(); + #[cfg(windows)] + let raw = socket.as_raw_socket(); + + if let Some(dscp) = peer.dscp() { + debug!("Setting dscp"); + set_dscp(raw, dscp)?; + } + Ok(()) + }); + let conn_res = match peer.connection_timeout() { + Some(t) => pingora_timeout::timeout(t, connect_future) + .await + .explain_err(ConnectTimedout, |_| { + format!("timeout {t:?} connecting to server {peer}") + })?, + None => connect_future.await, + }; + let socket = match conn_res { + Ok(socket) => { + debug!("connected to new server: {}", peer.address()); + Ok(socket.into()) + } + Err(e) => { + let c = format!("Fail to connect to {peer}"); + match e.etype() { + SocketError | BindError => Error::e_because(InternalError, c, e), + _ => Err(e.more_context(c)), + } + } + }?; + + let mut quic_http3_configs = None; + if let Some(peer_options) = peer.get_peer_options() { + quic_http3_configs = peer_options.quic_http3_configs.clone() + }; + + Ok(Connection::initiate(socket, quic_http3_configs)?.into()) + } + SocketAddr::Unix(_addr) => { + // NOTE: tokio::net::UnixDatagram support could be an option + // send_to(), recv_from() are using a file path with UnixDatagram + // needs verification if Quic/quiche can handle paths as SocketAddr + // in send() & recv() + Err(Error::explain( + BindError, "Unix Sockets for HTTP3 are not implemented")) + } + } +} + pub(crate) fn bind_to_random( peer: &P, v4_list: &[InetSocketAddr], diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index a9137ca81..287c4696f 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -25,7 +25,7 @@ mod tls; use crate::tls::connectors as tls; use crate::protocols::tls::quic::client::handshake as quic_handshake; -use crate::protocols::{ConnectionState, Stream}; +use crate::protocols::Stream; use crate::server::configuration::ServerConf; use crate::upstreams::peer::{Peer, ALPN}; @@ -326,15 +326,21 @@ async fn do_connect_inner( ) -> Result { let stream = l4_connect(peer, bind_to).await?; if peer.tls() { - let tls_stream = tls::connect(stream, peer, alpn_override, tls_ctx).await?; - Ok(Box::new(tls_stream)) - } else if stream.is_quic_connection() { - // TODO: use tls_ctx with boringssl & quiche - // currently tls_ctx is already built, but quiche only provides a Config::from_boring() - // accepting a SslContextBuilder, but calling only .build() on it, likely a SslContext - // should be possible when modifying quiche - let quic_stream = quic_handshake(stream, peer, alpn_override, tls_ctx).await?; - Ok(Box::new(quic_stream)) + if peer.udp_http3() { + if !peer.tls() { + return Err(Error::explain( + HandshakeError, "usage of HTTP3 requires enabled TLS for the peer")) + } + // TODO: use tls_ctx with boringssl & quiche + // tls_ctx is already built, but quiche only provides a Config::from_boring() + // accepting a SslContextBuilder, but internally calling only .build() on it, + // likely a SslContext it should be possible to adapt quiche to accept a SslConnector + let quic_stream = quic_handshake(stream, peer, alpn_override, tls_ctx).await?; + Ok(Box::new(quic_stream)) + } else { + let tls_stream = tls::connect(stream, peer, alpn_override, tls_ctx).await?; + Ok(Box::new(tls_stream)) + } } else { Ok(Box::new(stream)) } @@ -558,7 +564,7 @@ pub(crate) mod quic_tests { use std::thread; use std::thread::JoinHandle; use crate::apps::http_app::ServeHttp; - use crate::listeners::{Listeners, ALPN}; + use crate::listeners::Listeners; use crate::protocols::http::ServerSession; use crate::protocols::l4::quic::{QuicHttp3Configs, MAX_IPV6_BUF_SIZE}; use crate::server::Server; @@ -602,10 +608,10 @@ pub(crate) mod quic_tests { let mut peer = HttpPeer::new( format!("127.0.0.1:{port}"), - false, + true, "openrusty.org".to_string(), ); - peer.options.alpn = ALPN::H3; + peer.options.set_http_version(3, 3); info!("Startup completed.."); Ok((server_handle, peer)) diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index fd0f3b5c9..4015efafb 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -100,8 +100,7 @@ impl Http3Session { } let headers = request_headers_to_event(&req)?; - let stream_id = self.send_request(&headers, false).await?; - error!("stream_id {}", stream_id); + self.send_request(&headers, false).await?; self.request_header_written = Some(req); Ok(()) diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index d6916e552..df50dcd64 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -490,7 +490,6 @@ impl H3Session { return Ok(None); } - // FIXME: this is wrong, required to wait for data event first? data_finished_event(self.stream_id, &mut self.event_rx).await?; self.read_ended = true; diff --git a/pingora-core/src/protocols/tls/quic/client.rs b/pingora-core/src/protocols/tls/quic/client.rs index 770638c2f..98a83744d 100644 --- a/pingora-core/src/protocols/tls/quic/client.rs +++ b/pingora-core/src/protocols/tls/quic/client.rs @@ -60,8 +60,8 @@ where pub(crate) async fn handshake_outgoing

( state: &mut OutgoingHandshakeState, peer: &P, - alpn_override: Option, - tls_ctx: &SslConnector, + _alpn_override: Option, // potentially HTTP09 could be supported + _tls_ctx: &SslConnector, // currently the SslConnector cannot be used with quiche, might be feasible ) -> pingora_error::Result where P: Peer + Send + Sync, diff --git a/pingora-core/src/upstreams/peer.rs b/pingora-core/src/upstreams/peer.rs index 56899067a..2e9790faf 100644 --- a/pingora-core/src/upstreams/peer.rs +++ b/pingora-core/src/upstreams/peer.rs @@ -205,16 +205,15 @@ pub trait Peer: Display + Clone { None } - fn ip_proto(&self) -> IpProto { - IpProto::TCP + fn udp_http3(&self) -> bool { + let mut udp_http3 = false; + if let Some(alpn) = self.get_alpn() { + udp_http3 = matches!(alpn, &ALPN::H3) + } + udp_http3 } } -pub enum IpProto { - TCP, - UDP, -} - /// A simple TCP or TLS peer without many complicated settings. #[derive(Debug, Clone)] pub struct BasicPeer { @@ -325,6 +324,7 @@ pub struct PeerOptions { pub verify_hostname: bool, /* accept the cert if it's CN matches the SNI or this name */ pub alternative_cn: Option, + /// to enable HTTP3 the ALPN needs to be ALPN::H3 pub alpn: ALPN, pub ca: Option>, pub tcp_keepalive: Option, @@ -336,7 +336,7 @@ pub struct PeerOptions { // how many concurrent h3 stream are allowed in the same connection pub max_h3_streams: usize, // quic and http3 configs (quiche) - pub quic_http3_config: Option, + pub quic_http3_configs: Option, pub extra_proxy_headers: BTreeMap>, // The list of curve the tls connection should advertise // if `None`, the default curves will be used @@ -372,7 +372,7 @@ impl PeerOptions { h2_ping_interval: None, max_h2_streams: 1, max_h3_streams: 1, - quic_http3_config: None, + quic_http3_configs: None, extra_proxy_headers: BTreeMap::new(), curves: None, second_keyshare: true, // default true and noop when not using PQ curves @@ -383,6 +383,7 @@ impl PeerOptions { } /// Set the ALPN according to the `max` and `min` constrains. + /// HTTP3 is only supported when setting min & max to 3 which corresponds to an `ALPN::H3` pub fn set_http_version(&mut self, max: u8, min: u8) { self.alpn = ALPN::new(max, min); } @@ -602,16 +603,6 @@ impl Peer for HttpPeer { fn get_tracer(&self) -> Option { self.options.tracer.clone() } - - fn ip_proto(&self) -> IpProto { - if let Some(peer_options) = self.get_peer_options() { - match peer_options.alpn { - ALPN::H3 => return IpProto::UDP, - _ => {} - } - } - IpProto::TCP - } } /// The proxy settings to connect to the remote server, CONNECT only for now From 8b7d9138364323878025f0e2674c3d16554749d1 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 24 Jan 2025 08:56:45 +0100 Subject: [PATCH 33/52] add connector tests, release streams on H3 session drop --- pingora-core/src/connectors/http/v3.rs | 86 ++++++++++++++++++-- pingora-core/src/protocols/http/v3/client.rs | 3 +- 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 40e3bcfed..6c0f18917 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -81,7 +81,11 @@ impl ConnectionRef { } fn is_idle(&self) -> bool { - self.0.current_streams.load(Ordering::SeqCst) == 0 + self.0.current_streams.load(Ordering::Relaxed) == 0 + } + + pub(crate) fn release_stream(&self) { + self.0.current_streams.fetch_sub(1, Ordering::SeqCst); } } @@ -398,20 +402,19 @@ async fn handshake( #[cfg(test)] mod tests { + use super::*; use bytes::Bytes; use http::Version; use zstd::zstd_safe::WriteBuf; - use crate::connectors::quic_tests::quic_listener_peer; use pingora_error::Result; use pingora_http::RequestHeader; - use super::*; + use crate::connectors::quic_tests::quic_listener_peer; use crate::upstreams::peer::HttpPeer; #[tokio::test] - async fn test_connector_quic_http3() -> Result<()> { + async fn test_listener_connector_quic_http3() -> Result<()> { let (_server_handle, peer) = quic_listener_peer()?; - let connector = Connector::new(None); let mut session = connector.new_http_session(&peer).await?; @@ -452,4 +455,77 @@ mod tests { HttpSession::H3(_h3_session) => assert!(true), } } + + #[tokio::test] + #[cfg(feature = "any_tls")] + async fn test_h3_single_stream() { + let connector = Connector::new(None); + let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into()); + peer.options.set_http_version(3, 3); + peer.options.max_h3_streams = 1; + let h3 = connector.new_http_session(&peer).await.unwrap(); + let h3_1 = match h3 { + HttpSession::H3(h3_stream) => h3_stream, + _ => panic!("expect h3"), + }; + + let id = h3_1.conn().id(); + + assert!(connector + .reused_http_session(&peer) + .await + .unwrap() + .is_none()); + + connector.release_http_session(h3_1, &peer, None); + + let h3_2 = connector.reused_http_session(&peer).await.unwrap().unwrap(); + assert_eq!(id, h3_2.conn().id()); + + connector.release_http_session(h3_2, &peer, None); + + let h3_3 = connector.reused_http_session(&peer).await.unwrap().unwrap(); + assert_eq!(id, h3_3.conn().id()); + } + + #[tokio::test] + #[cfg(feature = "any_tls")] + async fn test_h3_multiple_stream() { + let connector = Connector::new(None); + let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into()); + peer.options.set_http_version(3, 3); + peer.options.max_h3_streams = 3; + let h3 = connector.new_http_session(&peer).await.unwrap(); + let h3_1 = match h3 { + HttpSession::H3(h3_stream) => h3_stream, + _ => panic!("expect h3"), + }; + + let id = h3_1.conn().id(); + + let h3_2 = connector.reused_http_session(&peer).await.unwrap().unwrap(); + assert_eq!(id, h3_2.conn().id()); + let h3_3 = connector.reused_http_session(&peer).await.unwrap().unwrap(); + assert_eq!(id, h3_3.conn().id()); + + // max stream is 3 for now + assert!(connector + .reused_http_session(&peer) + .await + .unwrap() + .is_none()); + + connector.release_http_session(h3_1, &peer, None); + + let h3_4 = connector.reused_http_session(&peer).await.unwrap().unwrap(); + assert_eq!(id, h3_4.conn().id()); + + connector.release_http_session(h3_2, &peer, None); + connector.release_http_session(h3_3, &peer, None); + connector.release_http_session(h3_4, &peer, None); + + // all streams are released, now the connection is idle + let h3_5 = connector.reused_http_session(&peer).await.unwrap().unwrap(); + assert_eq!(id, h3_5.conn().id()); + } } \ No newline at end of file diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index 4015efafb..aee2c7b63 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -62,12 +62,13 @@ impl Http3Session { impl Drop for Http3Session { fn drop(&mut self) { + // TODO: clarify if a RESET_STREAM should be sent if let Some(stream_id) = self.stream_id { self.conn.drop_session(stream_id); debug!("connection {:?} dropping session with stream id {}", self.conn.conn_id(), stream_id) } - // TODO: clarify if a RESET_STREAM should be sent + self.conn.release_stream(); } } From 6c492e6ba082623208e5e58cc7394fe4e2a9d8b6 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 24 Jan 2025 09:14:09 +0100 Subject: [PATCH 34/52] consolidate server/client functionality on several stream related calls through ConnectionIo capacity, send_body, read_body, finish_send, shutdown fixes read_body: use Event::Finished on read limit body receive max size chunked reads fixes ConnectionTx: update max_udp_dgram_size before sending --- pingora-core/src/connectors/http/mod.rs | 9 +- pingora-core/src/connectors/http/v3.rs | 110 ++++--- pingora-core/src/connectors/l4.rs | 65 +++-- pingora-core/src/connectors/mod.rs | 20 +- pingora-core/src/protocols/http/v3/client.rs | 267 +++++++---------- pingora-core/src/protocols/http/v3/mod.rs | 273 ++++++++++++++---- pingora-core/src/protocols/http/v3/server.rs | 222 +++++--------- .../src/protocols/l4/quic/connector.rs | 4 +- .../src/protocols/l4/quic/listener.rs | 13 +- pingora-core/src/protocols/l4/quic/mod.rs | 44 ++- pingora-core/src/protocols/l4/stream.rs | 2 +- pingora-core/src/protocols/tls/quic/client.rs | 7 +- pingora-core/src/protocols/tls/quic/server.rs | 3 +- pingora-core/src/upstreams/peer.rs | 2 +- 14 files changed, 530 insertions(+), 511 deletions(-) diff --git a/pingora-core/src/connectors/http/mod.rs b/pingora-core/src/connectors/http/mod.rs index 450e0b18a..a92f2a644 100644 --- a/pingora-core/src/connectors/http/mod.rs +++ b/pingora-core/src/connectors/http/mod.rs @@ -14,15 +14,15 @@ //! Connecting to HTTP servers -use std::collections::HashMap; use crate::connectors::ConnectorOptions; use crate::protocols::http::client::HttpSession; +use crate::protocols::{UniqueID, UniqueIDType}; use crate::upstreams::peer::Peer; -use pingora_error::Result; -use std::time::Duration; use parking_lot::RwLock; +use pingora_error::Result; use pingora_pool::PoolNode; -use crate::protocols::{UniqueID, UniqueIDType}; +use std::collections::HashMap; +use std::time::Duration; pub mod v1; pub mod v2; @@ -108,7 +108,6 @@ impl Connector { } } - // TODO: also use in v2, currently only used in v3 pub(crate) struct InUsePool { // TODO: use pingora hashmap to shard the lock contention diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 6c0f18917..21f566c5d 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -3,6 +3,7 @@ use super::HttpSession; use crate::connectors::http::InUsePool; use crate::connectors::{ConnectorOptions, TransportConnector}; use crate::protocols::http::v3::client::{Http3Poll, Http3Session}; +use crate::protocols::http::v3::ConnectionIo; use crate::protocols::l4::quic::{Connection, Crypto}; use crate::protocols::{Digest, Stream, UniqueID, UniqueIDType}; use crate::upstreams::peer::{Peer, ALPN}; @@ -17,7 +18,7 @@ use std::collections::VecDeque; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Duration; -use tokio::sync::{watch, mpsc, Notify}; +use tokio::sync::{mpsc, watch}; use tokio::task::JoinHandle; // FIXME: ConnectorOptions contains CA file path from ServerConfig @@ -25,12 +26,16 @@ use tokio::task::JoinHandle; pub(crate) struct ConnectionRef(Arc); impl ConnectionRef { - pub fn new(l4_stream: Stream, conn_io: ConnectionIo, digest: Digest, - add_sessions: Arc)>>>, - drop_sessions: Arc>>, - idle_close: watch::Receiver, - max_streams: usize, h3poll_task: JoinHandle>, ) -> Self { - + pub fn new( + l4_stream: Stream, + conn_io: ConnectionIo, + digest: Digest, + add_sessions: Arc)>>>, + drop_sessions: Arc>>, + idle_close: watch::Receiver, + max_streams: usize, + h3poll_task: JoinHandle>, + ) -> Self { Self(Arc::new(ConnectionRefInner { l4_stream, conn_io, @@ -87,6 +92,10 @@ impl ConnectionRef { pub(crate) fn release_stream(&self) { self.0.current_streams.fetch_sub(1, Ordering::SeqCst); } + + pub fn digest_mut(&mut self) -> Option<&mut Digest> { + Arc::get_mut(&mut self.0).map(|inner| &mut inner.digest) + } } pub(crate) struct ConnectionRefInner { @@ -115,14 +124,17 @@ pub(crate) struct ConnectionRefInner { // watch for idle pool timeouts idle_close: watch::Receiver, - h3poll_task: JoinHandle> + h3poll_task: JoinHandle>, } impl Drop for ConnectionRefInner { fn drop(&mut self) { if !self.h3poll_task.is_finished() { self.h3poll_task.abort(); - debug!("connection {:?} stopped H3Poll task", self.conn_io.conn_id) + debug!( + "connection {:?} stopped Http3Poll task", + self.conn_io.conn_id + ) } } } @@ -133,25 +145,6 @@ impl UniqueID for ConnectionRef { } } -#[derive(Clone)] -pub(crate) struct ConnectionIo { - pub(crate) conn_id: ConnectionId<'static>, - - pub(crate) quic: Arc>, - pub(crate) http3: Arc>, - - pub(crate) rx_notify: Arc, - pub(crate) tx_notify: Arc, -} - - -impl ConnectionIo { - fn is_shutting_down(&self) -> bool { - let qconn = self.quic.lock(); - qconn.is_draining() - } -} - /// Http3 connector pub struct Connector { // for creating connections, the Stream for h3 should be reused @@ -284,7 +277,7 @@ impl Connector { // check alpn match stream.selected_alpn_proto() { Some(ALPN::H3) => { /* continue */ } - _ => return Err(Error::explain(InternalError, "peer ALPN is not H3")) + _ => return Err(Error::explain(InternalError, "peer ALPN is not H3")), } let max_h3_stream = peer.get_peer_options().map_or(1, |o| o.max_h3_streams); @@ -321,17 +314,14 @@ impl ConnectionRef { pub fn more_streams_allowed(&self) -> bool { let qconn = self.0.conn_io.quic.lock(); - qconn.is_established() && - !qconn.is_closed() && - !qconn.is_draining() && - qconn.peer_streams_left_bidi() > 0 + qconn.is_established() + && !qconn.is_closed() + && !qconn.is_draining() + && qconn.peer_streams_left_bidi() > 0 } } -async fn handshake( - mut stream: Stream, - max_streams: usize -) -> Result { +async fn handshake(mut stream: Stream, max_streams: usize) -> Result { // Safe guard: new_http_session() assumes there should be at least one free stream if max_streams == 0 { return Error::e_explain(H3Error, "zero max_stream configured"); @@ -347,13 +337,13 @@ async fn handshake( socket_digest: stream.get_socket_digest(), }; let Some(quic_state) = stream.quic_connection_state() else { - return Err(Error::explain(InternalError, "stream is not a Quic stream")) + return Err(Error::explain(InternalError, "stream is not a Quic stream")); }; - let conn_io = match quic_state { - Connection::IncomingHandshake(_) | - Connection::IncomingEstablished(_) | - Connection::OutgoingHandshake(_) => { + let conn_io = match quic_state { + Connection::IncomingHandshake(_) + | Connection::IncomingEstablished(_) + | Connection::OutgoingHandshake(_) => { return Err(Error::explain(InternalError, "invalid Quic stream state")) } Connection::OutgoingEstablished(e_state) => { @@ -362,6 +352,7 @@ async fn handshake( quiche::h3::Connection::with_transport(&mut conn, &e_state.http3_config) .explain_err(HandshakeError, |_| "during H3 handshake") }?; + e_state.tx_notify.notify_waiters(); ConnectionIo { conn_id: e_state.connection_id.clone(), @@ -374,7 +365,6 @@ async fn handshake( }; debug!("H3 handshake to server done."); - let add_sessions = Arc::new(Mutex::new(VecDeque::default())); let drop_sessions = Arc::new(Mutex::new(VecDeque::default())); let (idle_close_tx, idle_close_rx) = watch::channel::(false); @@ -384,7 +374,7 @@ async fn handshake( sessions: Default::default(), add_sessions: add_sessions.clone(), drop_sessions: drop_sessions.clone(), - idle_close: idle_close_tx + idle_close: idle_close_tx, }; let h3poll_task = pingora_runtime::current_handle().spawn(h3poll.start()); @@ -396,20 +386,20 @@ async fn handshake( drop_sessions, idle_close_rx, max_streams, - h3poll_task + h3poll_task, )) } #[cfg(test)] mod tests { use super::*; - use bytes::Bytes; + use crate::connectors::quic_tests::quic_listener_peer; + use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; + use crate::upstreams::peer::HttpPeer; + use bytes::{BufMut, BytesMut}; use http::Version; - use zstd::zstd_safe::WriteBuf; use pingora_error::Result; use pingora_http::RequestHeader; - use crate::connectors::quic_tests::quic_listener_peer; - use crate::upstreams::peer::HttpPeer; #[tokio::test] async fn test_listener_connector_quic_http3() -> Result<()> { @@ -421,25 +411,31 @@ mod tests { let mut req = RequestHeader::build("GET", b"/", Some(3))?; req.insert_header(http::header::HOST, "openresty.org")?; + let body_base = "hello world\n"; + let body_string = body_base.repeat(MAX_IPV6_QUIC_DATAGRAM_SIZE * 128 / body_base.len()); + let mut body_send = BytesMut::new(); + body_send.put(body_string.as_bytes()); + session.write_request_header(Box::new(req)).await?; - session.write_request_body(Bytes::from(b"hello world".as_slice()), false).await?; + session + .write_request_body(body_send.freeze(), false) + .await?; session.finish_request_body().await?; session.read_response_header().await?; - let resp_body = session.read_response_body().await?; let resp = session.response_header(); - assert!(resp.is_some()); if let Some(resp) = resp { assert_eq!(resp.status.as_str(), "200"); assert_eq!(resp.version, Version::HTTP_3); } - assert!(resp_body.is_some()); - if let Some(resp_body) = resp_body { - assert_eq!(resp_body.as_slice(), b"hello world".as_slice()) + let mut resp_body = BytesMut::new(); + while let Some(body) = session.read_response_body().await? { + assert!(body.len() < MAX_IPV6_QUIC_DATAGRAM_SIZE * 64); + resp_body.put(body) } - + assert_eq!(resp_body.as_ref(), body_string.as_bytes()); Ok(()) } @@ -528,4 +524,4 @@ mod tests { let h3_5 = connector.reused_http_session(&peer).await.unwrap().unwrap(); assert_eq!(id, h3_5.conn().id()); } -} \ No newline at end of file +} diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index ca1ca2cb2..d9cb17ab4 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -12,15 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use async_trait::async_trait; -use log::debug; -use pingora_error::{Context, Error, ErrorType::*, OrErr, Result}; -use rand::seq::SliceRandom; -use std::net::SocketAddr as InetSocketAddr; -#[cfg(unix)] -use std::os::unix::io::AsRawFd; -#[cfg(windows)] -use std::os::windows::io::AsRawSocket; #[cfg(unix)] use crate::protocols::l4::ext::connect_uds; use crate::protocols::l4::ext::{ @@ -32,6 +23,15 @@ use crate::protocols::l4::socket::SocketAddr; use crate::protocols::l4::stream::Stream; use crate::protocols::{GetSocketDigest, SocketDigest}; use crate::upstreams::peer::Peer; +use async_trait::async_trait; +use log::debug; +use pingora_error::{Context, Error, ErrorType::*, OrErr, Result}; +use rand::seq::SliceRandom; +use std::net::SocketAddr as InetSocketAddr; +#[cfg(unix)] +use std::os::unix::io::AsRawFd; +#[cfg(windows)] +use std::os::windows::io::AsRawSocket; /// The interface to establish a L4 connection #[async_trait] @@ -102,19 +102,18 @@ where } let peer_addr = peer.address(); - let mut stream: Stream = if let Some(custom_l4) = - peer.get_peer_options().and_then(|o| o.custom_l4.as_ref()) - { - custom_l4.connect(peer_addr).await? - } else { - if peer.udp_http3() { - // create UDP sockets - inner_udp_connect(peer, &bind_to, peer_addr).await? + let mut stream: Stream = + if let Some(custom_l4) = peer.get_peer_options().and_then(|o| o.custom_l4.as_ref()) { + custom_l4.connect(peer_addr).await? } else { - // create TCP sockets - inner_tcp_connect(peer, bind_to, peer_addr).await? - } - }; + if peer.udp_http3() { + // create UDP sockets + inner_udp_connect(peer, &bind_to, peer_addr).await? + } else { + // create TCP sockets + inner_tcp_connect(peer, bind_to, peer_addr).await? + } + }; let tracer = peer.get_tracer(); if let Some(t) = tracer { @@ -141,9 +140,13 @@ where Ok(stream) } -async fn inner_tcp_connect

(peer: &P, bind_to: Option, peer_addr: &SocketAddr) -> Result +async fn inner_tcp_connect

( + peer: &P, + bind_to: Option, + peer_addr: &SocketAddr, +) -> Result where - P: Peer + Send + Sync + P: Peer + Send + Sync, { match peer_addr { SocketAddr::Inet(addr) => { @@ -219,9 +222,13 @@ where } } -async fn inner_udp_connect

(peer: &P, bind_to: &Option, peer_addr: &SocketAddr) -> Result +async fn inner_udp_connect

( + peer: &P, + bind_to: &Option, + peer_addr: &SocketAddr, +) -> Result where - P: Peer + Send + Sync + P: Peer + Send + Sync, { match peer_addr { SocketAddr::Inet(addr) => { @@ -272,7 +279,9 @@ where // needs verification if Quic/quiche can handle paths as SocketAddr // in send() & recv() Err(Error::explain( - BindError, "Unix Sockets for HTTP3 are not implemented")) + BindError, + "Unix Sockets for HTTP3 are not implemented", + )) } } } @@ -662,10 +671,10 @@ mod tests { #[cfg(test)] mod quic_tests { use crate::connectors::l4::connect; + use crate::connectors::quic_tests::quic_listener_peer; + use crate::connectors::{do_connect, tls}; use crate::protocols::l4::quic::Connection; use crate::protocols::ConnectionState; - use crate::connectors::{do_connect, tls}; - use crate::connectors::quic_tests::quic_listener_peer; use pingora_error::Result; #[tokio::test] diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index 287c4696f..669130001 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -329,7 +329,9 @@ async fn do_connect_inner( if peer.udp_http3() { if !peer.tls() { return Err(Error::explain( - HandshakeError, "usage of HTTP3 requires enabled TLS for the peer")) + HandshakeError, + "usage of HTTP3 requires enabled TLS for the peer", + )); } // TODO: use tls_ctx with boringssl & quiche // tls_ctx is already built, but quiche only provides a Config::from_boring() @@ -561,10 +563,9 @@ mod tests { #[cfg(test)] pub(crate) mod quic_tests { - use std::thread; - use std::thread::JoinHandle; use crate::apps::http_app::ServeHttp; use crate::listeners::Listeners; + use crate::prelude::HttpPeer; use crate::protocols::http::ServerSession; use crate::protocols::l4::quic::{QuicHttp3Configs, MAX_IPV6_BUF_SIZE}; use crate::server::Server; @@ -572,11 +573,12 @@ pub(crate) mod quic_tests { use async_trait::async_trait; use bytes::{BufMut, BytesMut}; use http::{Response, StatusCode}; - use std::time::Duration; use log::info; - use pingora_timeout::timeout; use pingora_error::Result; - use crate::prelude::HttpPeer; + use pingora_timeout::timeout; + use std::thread; + use std::thread::JoinHandle; + use std::time::Duration; pub(crate) fn quic_listener_peer() -> Result<(JoinHandle<()>, HttpPeer)> { let port = 6147u16; @@ -622,8 +624,6 @@ pub(crate) mod quic_tests { #[async_trait] impl ServeHttp for EchoApp { async fn response(&self, http_stream: &mut ServerSession) -> Response> { - // read timeout of 2s - let read_timeout = 2000; let body_future = async { let mut body = BytesMut::with_capacity(MAX_IPV6_BUF_SIZE); while let Ok(b) = http_stream.read_request_body().await { @@ -638,6 +638,8 @@ pub(crate) mod quic_tests { body.freeze() }; + // read timeout of 2s + let read_timeout = 2000; let body = match timeout(Duration::from_millis(read_timeout), body_future).await { Ok(res) => res, Err(_) => { @@ -653,4 +655,4 @@ pub(crate) mod quic_tests { .unwrap() } } -} \ No newline at end of file +} diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index aee2c7b63..4dfc876b0 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -1,26 +1,26 @@ -use std::cmp; -use std::collections::VecDeque; -use std::fmt::Debug; -use std::sync::Arc; -use crate::connectors::http::v3::{ConnectionIo, ConnectionRef}; +use crate::connectors::http::v3::ConnectionRef; +use crate::protocols::http::v3::nohash::StreamIdHashMap; +use crate::protocols::http::v3::{ + data_finished_event, event_to_response_headers, headervec_to_headermap, + request_headers_to_event, ConnectionIo, H3_SESSION_EVENTS_CHANNEL_SIZE, +}; use crate::protocols::l4::socket::SocketAddr; use crate::protocols::{Digest, UniqueID, UniqueIDType}; -use bytes::{BufMut, Bytes, BytesMut}; +use bytes::Bytes; use http::HeaderMap; -use pingora_http::{RequestHeader, ResponseHeader}; -use std::time::Duration; -use log::{debug, error, trace, warn}; +use log::{debug, trace, warn}; use parking_lot::Mutex; -use quiche::{h3, Shutdown}; +use pingora_error::ErrorType::{H3Error, InternalError, InvalidHTTPHeader, ReadError, WriteError}; +use pingora_error::{Error, ErrorType, OrErr, Result}; +use pingora_http::{RequestHeader, ResponseHeader}; +use quiche::h3; use quiche::h3::{Event, Header, NameValue}; -use tokio::sync::{mpsc, watch}; +use std::collections::VecDeque; +use std::fmt::Debug; +use std::sync::Arc; +use std::time::Duration; use tokio::sync::mpsc::{Receiver, Sender}; -use pingora_error::{Error, ErrorType, OrErr, Result}; -use pingora_error::ErrorType::{H3Error, InternalError, InvalidHTTPHeader, ReadError, WriteError}; -use crate::protocols::http::v3::{data_finished_event, event_to_response_headers, headervec_to_headermap, request_headers_to_event, stream_capacity, H3_SESSION_EVENTS_CHANNEL_SIZE}; -use crate::protocols::http::v3::nohash::StreamIdHashMap; -use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; - +use tokio::sync::{mpsc, watch}; pub struct Http3Session { conn: ConnectionRef, @@ -44,12 +44,14 @@ pub struct Http3Session { // sent body bytes body_sent: usize, - // send is finished (Quic finished frame sent) + // sending body is finished (Quic stream FIN flag sent) send_ended: bool, // body bytes read body_read: usize, - // read is finished (Quic finished frame received) + // continue reading without waiting for new event + read_continue: bool, + // reading body is finished (Quic stream FIN flag received) read_ended: bool, } @@ -59,14 +61,16 @@ impl Http3Session { } } - impl Drop for Http3Session { fn drop(&mut self) { // TODO: clarify if a RESET_STREAM should be sent if let Some(stream_id) = self.stream_id { self.conn.drop_session(stream_id); - debug!("connection {:?} dropping session with stream id {}", - self.conn.conn_id(), stream_id) + debug!( + "connection {:?} dropping session with stream id {}", + self.conn.conn_id(), + stream_id + ) } self.conn.release_stream(); } @@ -85,15 +89,13 @@ impl Http3Session { body_sent: 0, send_ended: false, body_read: 0, + read_continue: false, read_ended: false, }) } /// Write the request header to the server - pub async fn write_request_header( - &mut self, - req: Box - ) -> Result<()> { + pub async fn write_request_header(&mut self, req: Box) -> Result<()> { if self.request_header_written.is_some() { // cannot send again warn!("request not sent as session already sent a request"); @@ -107,14 +109,19 @@ impl Http3Session { Ok(()) } - async fn send_request(&mut self, headers: &[T], fin: bool) -> Result { + async fn send_request( + &mut self, + headers: &[T], + fin: bool, + ) -> Result { // sending the request creates the underlying quic stream & according stream id // it is not possible to check the stream capacity before sending the request let stream_id = { let mut qconn = self.conn_io().quic.lock(); let mut hconn = self.conn_io().http3.lock(); - hconn.send_request(&mut qconn, headers, fin) + hconn + .send_request(&mut qconn, headers, fin) .explain_err(WriteError, |_| "failed to send http3 request headers")? }; @@ -126,8 +133,6 @@ impl Http3Session { Ok(stream_id) } - // TODO: potentially refactor/unify with server side - /// Write a request body chunk pub async fn write_request_body(&mut self, data: Bytes, end: bool) -> Result<()> { if self.send_ended { @@ -140,58 +145,17 @@ impl Http3Session { "trying to send the request body before request header being sent", )); }; - let Some(stream_id) = self.stream_id else { - return Err(Error::explain(H3Error, "stream id not present")); - }; - let mut sent_len = 0; - let mut fin = end; - while sent_len < data.len() { - let required = cmp::min(data.len() - sent_len, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let capacity = stream_capacity(&self.conn_io().quic, stream_id, required, - &self.conn_io().rx_notify, &self.conn_io().tx_notify).await?; - - let send = if capacity > data.len() - sent_len { - &data[sent_len..data.len()] - } else { - &data[sent_len..sent_len + capacity] - }; - - fin = sent_len + send.len() == data.len() && end; - match self.send_body(send, fin) { - Ok(sent_size) => { - debug_assert_eq!(sent_size, send.len()); - sent_len += sent_size; - } - Err(e) => { - return Err(e).explain_err(WriteError, |_| { - "writing h3 request body to downstream" - }) - } - } - } - debug_assert_eq!(fin, end); - debug_assert_eq!(sent_len, data.len()); - if end { - self.conn_io().tx_notify.notify_waiters(); - } + let sent_len = self + .conn_io() + .send_body(self.stream_id()?, &data, end) + .await?; self.body_sent += sent_len; self.send_ended = self.send_ended || end; Ok(()) } - - // TODO: potentially refactor/unify with server side - fn send_body(&self, body: &[u8], fin: bool) -> Result { - let mut qconn = self.conn_io().quic.lock(); - let mut hconn = self.conn_io().http3.lock(); - - hconn.send_body(&mut qconn, self.stream_id()?, body, fin) - .explain_err(WriteError, |e| format!("failed to send http3 request body {:?}", e)) - } - - // TODO: potentially refactor/unify with server side /// Signal that the request body has ended pub fn finish_request_body(&mut self) -> Result<()> { if self.send_ended { @@ -200,12 +164,7 @@ impl Http3Session { } if self.request_header_written.is_some() { - // use an empty data frame to signal the end - self.send_body(&[], true).explain_err( - WriteError, - |e| format! {"Writing h3 request body finished to downstream failed. {e}"}, - )?; - self.conn_io().tx_notify.notify_waiters(); + self.conn_io().finish_send(self.stream_id()?)?; self.send_ended = true; } // else: the response header is not sent, do nothing now. @@ -217,7 +176,7 @@ impl Http3Session { pub async fn read_response_header(&mut self) -> Result<()> { if self.response_header.is_some() { // already received - return Ok(()) + return Ok(()); }; let (headers, _) = headers_event(self.stream_id()?, self.event_rx()?).await?; @@ -241,7 +200,6 @@ impl Http3Session { Ok(event_rx) } - // TODO: potentially refactor/unify with server side /// Read the response body /// /// `None` means, no more body to read @@ -252,9 +210,19 @@ impl Http3Session { let read_timeout = self.read_timeout.clone(); tokio::select! { - res = data_finished_event(self.stream_id()?, self.event_rx()?) => { - self.read_ended = true; - res? + res = async { + if !self.read_continue { + data_finished_event(self.stream_id()?, self.event_rx()?).await + } else { + Ok(false) + } + } => { + let finished = res?; + if finished { + trace!("finished event received"); + self.read_ended = true; + return Ok(None) + } }, _timedout = async { if let Some(read_timeout) = read_timeout { @@ -267,44 +235,14 @@ impl Http3Session { } } - let mut buf = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; - let size = match self.recv_body(self.stream_id()?, &mut buf) { - Ok(size) => size, - Err(h3::Error::Done) => { - trace!("recv_body done"); - return Ok(Some(BytesMut::with_capacity(0).into())); - } - Err(e) => { - return Err(Error::explain( - ReadError, - format!("reading body failed with {}", e), - )) - } - }; - - let mut data = BytesMut::with_capacity(size); - data.put_slice(&buf[..size]); - let data: Bytes = data.into(); + let (data, continue_read) = self.conn_io().read_body(self.stream_id()?)?; + self.body_read += data.len(); + self.read_continue = continue_read; - self.body_read += size; - - trace!("ready body len={:?}", data.len()); + trace!("read response body len={:?}", data.len()); Ok(Some(data)) } - // TODO: potentially refactor/unify with server side - // TODO: check if result type can be changed (requires Error::Done not being used) - fn recv_body(&self, stream_id: u64, out: &mut [u8]) -> h3::Result { - let mut qconn = self.conn_io().quic.lock(); - let mut hconn = self.conn_io().http3.lock(); - debug!( - "H3 connection {:?} stream {} receiving body", - qconn.trace_id(), stream_id - ); - hconn.recv_body(&mut qconn, stream_id, out) - } - - /// Whether the response has ended pub fn response_finished(&self) -> bool { self.read_ended @@ -327,7 +265,7 @@ impl Http3Session { pub async fn read_trailers(&mut self) -> Result> { if !self.read_ended { warn!("trying to read trailers before body finished"); - return Ok(None) + return Ok(None); }; // RFC9110 Section 6.5.1 @@ -337,7 +275,8 @@ impl Http3Session { let mut client_accepts = false; if let Some(headers) = &self.request_header_written { if let Some(te_header) = headers.headers.get(http::header::TE) { - let te = te_header.to_str() + let te = te_header + .to_str() .explain_err(InvalidHTTPHeader, |_| "failed to parse TE header")?; client_accepts = te.contains("trailers") @@ -350,7 +289,7 @@ impl Http3Session { }; if !(client_accepts && response_has_trailers) { - return Ok(None) + return Ok(None); } // as per RFC9114/Section 4.1 it is an optional SINGLE header frame @@ -372,7 +311,6 @@ impl Http3Session { self.response_header.as_ref() } - // TODO: potentially refactor/unify with server side /// Give up the stream abruptly. /// /// This will send a `STOP_SENDING` and a `RESET_STREAM` for the Quic stream to the client. @@ -380,30 +318,12 @@ impl Http3Session { let stream_id = match self.stream_id() { Ok(id) => id, Err(_) => { - error!("failed to shutdown session, no stream id present"); - return + warn!("failed to shutdown session, no stream id present"); + return; } }; - - if !self.read_ended { - self.stream_shutdown(stream_id, Shutdown::Read, 2u64); - // sent STOP_SENDING frame & stream_recv() will no longer return data - self.read_ended = true; - } - if !self.send_ended { - self.stream_shutdown(stream_id, Shutdown::Write, 2u64); - // sent RESET_STREAM & stream_send() data will be ignored - self.send_ended = true; - } - } - - // TODO: potentially refactor/unify with server side - fn stream_shutdown(&self, stream_id: u64, direction: Shutdown, error_code: u64) { - let mut qconn = self.conn_io().quic.lock(); - match qconn.stream_shutdown(stream_id, direction, error_code) { - Ok(()) => self.conn_io().tx_notify.notify_waiters(), - Err(e) => warn!("h3 stream {} shutdown failed. {:?}", stream_id, e), - } + let conn_io = self.conn_io().clone(); + conn_io.shutdown_stream(stream_id, &mut self.read_ended, &mut self.send_ended); } /// Return the [`ConnectionRef`] of the Http3Session @@ -423,7 +343,7 @@ impl Http3Session { /// /// Will return `None` if multiple H3 streams are open. pub fn digest_mut(&mut self) -> Option<&mut Digest> { - todo!("needs an arc in order to get_mut successfully") + self.conn.digest_mut() } /// Return the server (peer) address recorded in the connection digest. @@ -450,7 +370,10 @@ impl Http3Session { } } -async fn headers_event(stream_id: u64, event_rx: &mut Receiver) -> Result<(Vec

, bool)> { +async fn headers_event( + stream_id: u64, + event_rx: &mut Receiver, +) -> Result<(Vec
, bool)> { loop { match event_rx.recv().await { Some(ev) => { @@ -459,9 +382,7 @@ async fn headers_event(stream_id: u64, event_rx: &mut Receiver) -> Result Event::Finished => { debug_assert!(false, "Finished event when Headers requested"); } - Event::Headers { list, more_frames } => { - return Ok((list, more_frames)) - } + Event::Headers { list, more_frames } => return Ok((list, more_frames)), Event::Data => { debug_assert!(false, "Data event when Headers requested"); } @@ -509,7 +430,9 @@ impl Http3Poll { if qconn.is_closed() { self.idle_close.send_replace(true); break 'poll Err(Error::explain( - H3Error, format!("quic connection {:?} is closed stopping", conn_id))); + H3Error, + format!("quic connection {:?} is closed stopping", conn_id), + )); } let mut hconn = self.conn_io.http3.lock(); @@ -524,13 +447,14 @@ impl Http3Poll { // TODO: connection timeout racing self.conn_io.rx_notify.notified().await; - continue 'poll + continue 'poll; } _ => { - break 'poll Err(e).explain_err( - H3Error, |_| format!("failed to poll h3 connection {:?}" , e)) + break 'poll Err(e).explain_err(H3Error, |_| { + format!("failed to poll h3 connection {:?}", e) + }) } - } + }, }; let session = if let Some(session) = self.sessions.get_mut(&stream_id) { @@ -540,12 +464,15 @@ impl Http3Poll { let Some(session) = self.sessions.get_mut(&stream_id) else { return Err(Error::explain( InternalError, - format!("missing session channel for stream id {}", stream_id))) + format!("missing session channel for stream id {}", stream_id), + )); }; session }; - session.send(ev).await + session + .send(ev) + .await .explain_err(H3Error, |_| "failed to forward h3 event to session")? } } @@ -555,30 +482,40 @@ impl Http3Poll { self.add_sessions() } - fn add_sessions(&mut self) -> Result<()>{ + fn add_sessions(&mut self) -> Result<()> { let mut add_sessions = self.add_sessions.lock(); while let Some((stream_id, sender)) = add_sessions.pop_front() { if let Some(_sender) = self.sessions.insert(stream_id, sender) { debug_assert!(false, "stream id {} existed", stream_id); return Err(Error::explain( - InternalError, format!("stream id {} was already present in sessions", stream_id))) + InternalError, + format!("stream id {} was already present in sessions", stream_id), + )); } else { - debug!("connection {:?} added stream id {} to sessions", self.conn_io.conn_id, stream_id) + debug!( + "connection {:?} added stream id {} to sessions", + self.conn_io.conn_id, stream_id + ) } } Ok(()) } - fn drop_sessions(&mut self) -> Result<()>{ + fn drop_sessions(&mut self) -> Result<()> { let mut drop_sessions = self.drop_sessions.lock(); while let Some(stream_id) = drop_sessions.pop_front() { if let Some(_sender) = self.sessions.remove(&stream_id) { - debug!("connection {:?} removed stream id {} from sessions", self.conn_io.conn_id, stream_id) + debug!( + "connection {:?} removed stream id {} from sessions", + self.conn_io.conn_id, stream_id + ) } else { return Err(Error::explain( - InternalError, format!("failed to remove session with stream id {}", stream_id))) + InternalError, + format!("failed to remove session with stream id {}", stream_id), + )); } } Ok(()) } -} \ No newline at end of file +} diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 98f7295c7..199cf623e 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -14,28 +14,207 @@ //! HTTP/3 implementation +use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; +use bytes::{BufMut, Bytes, BytesMut}; +use http::uri::{Authority, Scheme}; use http::{HeaderMap, HeaderName, HeaderValue, Request, Uri, Version}; -use log::{trace, warn}; +use log::{debug, trace, warn}; +use parking_lot::Mutex; +use pingora_error::ErrorType::{H3Error, InvalidHTTPHeader, ReadError, WriteError}; use pingora_error::{Error, ErrorType, OrErr, Result}; use pingora_http::{RequestHeader, ResponseHeader}; use quiche::h3::{Event, Header, NameValue}; +use quiche::{ConnectionId, Shutdown}; +use std::cmp; use std::fmt::Debug; use std::future::Future; use std::pin::Pin; -use http::uri::{Authority, Scheme}; -use parking_lot::Mutex; -use quiche::Connection; +use std::sync::Arc; use tokio::sync::mpsc::Receiver; use tokio::sync::Notify; -use pingora_error::ErrorType::{H3Error, InvalidHTTPHeader, ReadError}; pub const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; pub const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; +const MAX_PER_INVOCATION_READ_BODY_BYTES: usize = MAX_IPV6_QUIC_DATAGRAM_SIZE * 64; + pub mod client; pub mod nohash; pub mod server; +#[derive(Clone)] +pub(crate) struct ConnectionIo { + pub(crate) conn_id: ConnectionId<'static>, + + pub(crate) quic: Arc>, + pub(crate) http3: Arc>, + + // receive notification on Quic recv, used to check stream capacity + // as it only increases after MaxData or MaxStreamData frame was received + pub(crate) rx_notify: Arc, + + pub(crate) tx_notify: Arc, +} + +impl ConnectionIo { + pub(crate) fn is_shutting_down(&self) -> bool { + let qconn = self.quic.lock(); + qconn.is_draining() + } + + fn stream_capacity( + &self, + stream_id: u64, + required: usize, + ) -> Pin> + Send + '_>> { + Box::pin(async move { + let capacity; + { + let qconn = self.quic.lock(); + let conn_id = qconn.trace_id(); + capacity = qconn + .stream_capacity(stream_id) + .explain_err(WriteError, |e| { + format!( + "H3 connection {} failed to acquire capacity for stream {} error {:?}", + conn_id, stream_id, e + ) + })?; + } + + if capacity >= required { + Ok(capacity) + } else { + self.tx_notify.notify_waiters(); + self.rx_notify.notified().await; + self.stream_capacity(stream_id, required).await + } + }) + } + + async fn send_body(&self, stream_id: u64, data: &[u8], end: bool) -> Result { + let mut sent_len = 0; + let mut fin = end; + while sent_len < data.len() { + let required = cmp::min(data.len() - sent_len, MAX_IPV6_QUIC_DATAGRAM_SIZE); + let capacity = self.stream_capacity(stream_id, required).await?; + + let send = if capacity > data.len() - sent_len { + &data[sent_len..data.len()] + } else { + &data[sent_len..sent_len + capacity] + }; + + fin = sent_len + send.len() == data.len() && end; + match self.send_body_conn(stream_id, send, fin) { + Ok(sent_size) => { + sent_len += sent_size; + // following capacity check will send in case stream is full + } + Err(e) => { + return Err(e) + .explain_err(WriteError, |_| "writing h3 request body to downstream") + } + } + } + debug_assert_eq!(fin, end); + debug_assert_eq!(sent_len, data.len()); + + if end { + self.tx_notify.notify_waiters(); + } + + Ok(sent_len) + } + + fn send_body_conn(&self, stream_id: u64, body: &[u8], fin: bool) -> Result { + let mut qconn = self.quic.lock(); + let mut hconn = self.http3.lock(); + + hconn + .send_body(&mut qconn, stream_id, body, fin) + .explain_err(WriteError, |e| { + format!("failed to send http3 request body {:?}", e) + }) + } + + fn finish_send(&self, stream_id: u64) -> Result<()> { + // use an empty data frame to signal the end + self.send_body_conn(stream_id, &[], true).explain_err( + WriteError, + |e| format! {"Writing h3 request body finished to downstream failed. {e}"}, + )?; + self.tx_notify.notify_waiters(); + trace!("sent FIN flag for stream id {}", stream_id); + Ok(()) + } + + fn read_body(&self, stream_id: u64) -> Result<(Bytes, bool)> { + let mut buf = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; + let mut data = BytesMut::new(); + + let continue_read = loop { + match self.read_body_conn(stream_id, &mut buf) { + Ok(read) => { + data.put_slice(&buf[..read]); + // limit in memory buffer growth + if data.len() + buf.len() > MAX_PER_INVOCATION_READ_BODY_BYTES { + // required to decide if subsequent calls should wait for new poll events + break true; + } + } + Err(quiche::h3::Error::Done) => { + // poll for next Http3 event + // Event::Finished is only emitted after recv_body is Done + self.rx_notify.notify_waiters(); + trace!("read_body done"); + break false; + } + Err(e) => { + return Err(Error::explain( + ReadError, + format!("reading body failed with {}", e), + )) + } + }; + }; + + Ok((data.into(), continue_read)) + } + + fn read_body_conn(&self, stream_id: u64, out: &mut [u8]) -> quiche::h3::Result { + let mut qconn = self.quic.lock(); + let mut hconn = self.http3.lock(); + debug!( + "H3 connection {:?} stream {} receiving body", + self.conn_id, stream_id + ); + hconn.recv_body(&mut qconn, stream_id, out) + } + + fn shutdown_stream(&self, stream_id: u64, read_ended: &mut bool, write_ended: &mut bool) { + let mut qconn = self.quic.lock(); + if !*read_ended { + // sent STOP_SENDING frame & stream_recv() will no longer return data + match qconn.stream_shutdown(stream_id, Shutdown::Read, 2u64) { + Ok(()) => {} + Err(e) => warn!("h3 stream {} shutdown failed. {:?}", stream_id, e), + } + *read_ended = true; + } + if !*write_ended { + // sent RESET_STREAM & stream_send() data will be ignored + match qconn.stream_shutdown(stream_id, Shutdown::Write, 2u64) { + Ok(()) => {} + Err(e) => warn!("h3 stream {} shutdown failed. {:?}", stream_id, e), + } + *write_ended = true; + } + + self.tx_notify.notify_waiters() + } +} + fn event_to_request_headers(list: &Vec
) -> Result { let (mut parts, _) = Request::new(()).into_parts(); let mut uri = Uri::builder(); @@ -83,7 +262,10 @@ fn response_headers_to_event(resp: &ResponseHeader) -> Vec
{ fn request_headers_to_event(req: &RequestHeader) -> Result> { let mut qheaders: Vec
= Vec::with_capacity(req.headers.len() + 4); // only encrypted traffic supported in HTTP3 - qheaders.push(Header::new(b":scheme".as_slice(), Scheme::HTTPS.to_string().as_bytes())); + qheaders.push(Header::new( + b":scheme".as_slice(), + Scheme::HTTPS.to_string().as_bytes(), + )); // use authority when present let authority = if let Some(authority) = req.uri.authority() { @@ -95,16 +277,23 @@ fn request_headers_to_event(req: &RequestHeader) -> Result> { return Error::e_explain(InvalidHTTPHeader, "no authority header for h3"); }; // validate - Authority::try_from(host.as_bytes()) - .explain_err(InvalidHTTPHeader, |_| format!("invalid authority from host {:?}", host))? + Authority::try_from(host.as_bytes()).explain_err(InvalidHTTPHeader, |_| { + format!("invalid authority from host {:?}", host) + })? }; - qheaders.push(Header::new(b":authority".as_slice(), authority.as_str().as_bytes())); + qheaders.push(Header::new( + b":authority".as_slice(), + authority.as_str().as_bytes(), + )); let Some(path) = req.uri.path_and_query() else { return Error::e_explain(InvalidHTTPHeader, "no path header for h3"); }; qheaders.push(Header::new(b":path".as_slice(), path.as_str().as_bytes())); - qheaders.push(Header::new(b":method".as_slice(), req.method.as_str().as_bytes())); + qheaders.push(Header::new( + b":method".as_slice(), + req.method.as_str().as_bytes(), + )); // copy all other request headers // the pseudo-headers starting with ":" need to be sent before regular headers @@ -121,12 +310,12 @@ fn event_to_response_headers(resp: &Vec
) -> Result { response.set_version(Version::HTTP_3); for h in &resp[1..] { - let k = HeaderName::from_bytes(h.name()) - .explain_err(InvalidHTTPHeader, - |_| format!("failed to parse header name {:?}", h.name()))?; - let v = HeaderValue::from_bytes(h.value()) - .explain_err(InvalidHTTPHeader, - |_| format!("failed to parse header value {:?}", h.value()))?; + let k = HeaderName::from_bytes(h.name()).explain_err(InvalidHTTPHeader, |_| { + format!("failed to parse header name {:?}", h.name()) + })?; + let v = HeaderValue::from_bytes(h.value()).explain_err(InvalidHTTPHeader, |_| { + format!("failed to parse header value {:?}", h.value()) + })?; response.append_header(k, v)?; } @@ -144,12 +333,12 @@ fn headervec_to_headermap(headers: &Vec
) -> Result { let mut map = HeaderMap::with_capacity(headers.len()); for h in headers { if h.name().len() > 0 && h.name()[0] == b":".as_slice()[0] { - let k = HeaderName::from_bytes(h.name()) - .explain_err(InvalidHTTPHeader, - |_| format!("failed to parse header name {:?}", h.name()))?; - let v = HeaderValue::from_bytes(h.value()) - .explain_err(InvalidHTTPHeader, - |_| format!("failed to parse header value {:?}", h.value()))?; + let k = HeaderName::from_bytes(h.name()).explain_err(InvalidHTTPHeader, |_| { + format!("failed to parse header name {:?}", h.name()) + })?; + let v = HeaderValue::from_bytes(h.value()).explain_err(InvalidHTTPHeader, |_| { + format!("failed to parse header value {:?}", h.value()) + })?; map.insert(k, v); } } @@ -162,53 +351,21 @@ fn header_size(headers: &[T]) -> usize { .fold(0, |acc, h| acc + h.value().len() + h.name().len() + 32) } -fn stream_capacity<'a>( - conn: &'a Mutex, - stream_id: u64, - required: usize, - rx_notify: &'a Notify, - tx_notify: &'a Notify -) -> Pin> + Send + 'a>> { - Box::pin(async move { - let capacity; - { - let qconn = conn.lock(); - let conn_id = qconn.trace_id(); - capacity = qconn.stream_capacity(stream_id) - .explain_err(ErrorType::WriteError, |e| { - format!( - "H3 connection {} failed to acquire capacity for stream {} error {:?}", - conn_id, stream_id, e - ) - })?; - } - - // FIXME: handle capacity <= required e.g. required is gt configured send buffers - if capacity >= required { - Ok(capacity) - } else { - tx_notify.notify_waiters(); - rx_notify.notified().await; - stream_capacity(conn, stream_id, required, rx_notify, tx_notify).await - } - }) -} - -async fn data_finished_event(stream_id: u64, event_rx: &mut Receiver) -> Result<()> { +async fn data_finished_event(stream_id: u64, event_rx: &mut Receiver) -> Result { loop { match event_rx.recv().await { Some(ev) => { match ev { Event::Finished => { trace!("stream {} event {:?}", stream_id, ev); - return Ok(()); + return Ok(true); } Event::Headers { .. } => { debug_assert!(false, "Headers or Finished event when Data requested"); } Event::Data => { trace!("stream {} event {:?}", stream_id, ev); - return Ok(()); + return Ok(false); } Event::Reset(error_code) => { return Err(Error::explain( @@ -246,4 +403,4 @@ async fn data_finished_event(stream_id: u64, event_rx: &mut Receiver) -> } } } -} \ No newline at end of file +} diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index df50dcd64..a5dc0ad8a 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -14,31 +14,33 @@ //! HTTP/3 server session +use crate::protocols::http::body_buffer::FixedBuffer; use crate::protocols::http::date::get_cached_date; use crate::protocols::http::v1::client::http_req_header_to_wire; +use crate::protocols::http::v3::nohash::StreamIdHashMap; +use crate::protocols::http::v3::{ + data_finished_event, event_to_request_headers, header_size, headermap_to_headervec, + response_headers_to_event, ConnectionIo, H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY, + H3_SESSION_EVENTS_CHANNEL_SIZE, +}; +use crate::protocols::http::HttpTask; +use crate::protocols::l4::quic::Connection; use crate::protocols::{Digest, SocketAddr, Stream}; -use bytes::{BufMut, Bytes, BytesMut}; +use bytes::Bytes; use http::uri::PathAndQuery; use http::{header, HeaderMap, HeaderName}; use log::{debug, error, info, trace, warn}; use parking_lot::Mutex; -use pingora_error::{Error, OrErr, Result}; use pingora_error::ErrorType::{ConnectError, H3Error, InternalError, ReadError, WriteError}; +use pingora_error::{Error, OrErr, Result}; use pingora_http::{RequestHeader, ResponseHeader}; -use std::cmp; +pub use quiche::h3::Config as H3Options; +use quiche::h3::{Connection as QuicheH3Connection, Event, NameValue}; +use quiche::{h3, Connection as QuicheConnection, ConnectionId}; use std::collections::VecDeque; use std::fmt::Debug; use std::sync::Arc; use std::time::Duration; - -use crate::protocols::http::body_buffer::FixedBuffer; -use crate::protocols::http::v3::nohash::StreamIdHashMap; -use crate::protocols::http::v3::{data_finished_event, event_to_request_headers, header_size, headermap_to_headervec, response_headers_to_event, stream_capacity, H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY, H3_SESSION_EVENTS_CHANNEL_SIZE}; -use crate::protocols::http::HttpTask; -use crate::protocols::l4::quic::{Connection, MAX_IPV6_QUIC_DATAGRAM_SIZE}; -pub use quiche::h3::Config as H3Options; -use quiche::h3::{Connection as QuicheH3Connection, Event, NameValue}; -use quiche::{h3, Connection as QuicheConnection, ConnectionId, Shutdown}; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::{mpsc, Notify}; @@ -68,9 +70,7 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

, pub(crate) stream_id: u64, + + conn_io: ConnectionIo, + + // TODO: consolidate in ConnecitonIo quic_connection: Arc>, h3_connection: Arc>, - // notify during drop to remove event_tx from active sessions drop_session: Arc>>, - // trigger Quic send, continue ConnectionTx write loop tx_notify: Arc, - // receive notification on Quic recv, used to check stream capacity - // as it only increases after MaxData or MaxStreamData frame was received - rx_notify: Arc, // HTTP3 event channel for this stream_id event_rx: Receiver, @@ -233,6 +232,9 @@ pub struct H3Session { request_header: RequestHeader, // required as separate field for has_body request_has_body: bool, + // continue reading without waiting for new event + read_continue: bool, + // reading body is finished (Quic stream FIN flag received) read_ended: bool, body_read: usize, // buffered request body for retry logic @@ -244,8 +246,7 @@ pub struct H3Session { // How many (application, not wire) response body bytes have been sent so far. body_sent: usize, - // track if the FIN STREAM frame was already sent - // quiche::Connection::stream_send fin argument + // sending body is finished (Quic stream FIN flag sent) send_ended: bool, // digest to record underlying connection info @@ -303,12 +304,9 @@ impl H3Session { "H3 connection {:?} stream {} forward event={:?}", conn.connection_id, stream_id, ev ); - channel - .send(ev) - .await - .explain_err(WriteError, |e| { - format!("failed to send on event channel with {}", e) - })?; + channel.send(ev).await.explain_err(WriteError, |e| { + format!("failed to send on event channel with {}", e) + })?; } else { debug!( "H3 connection {:?} stream {} received event {:?}", @@ -329,9 +327,7 @@ impl H3Session { let mut hconn = conn.h3_connection.lock(); hconn .send_goaway(&mut qconn, conn.max_accepted_stream_id) - .explain_err(InternalError, |_| { - "failed to send goaway" - })?; + .explain_err(InternalError, |_| "failed to send goaway")?; conn.tx_notify.notify_waiters(); } Event::Headers { list, more_frames } => { @@ -346,20 +342,27 @@ impl H3Session { mpsc::channel(H3_SESSION_EVENTS_CHANNEL_SIZE); let session = H3Session { - connection_id: conn.connection_id.clone(), stream_id, - + conn_io: ConnectionIo { + conn_id: conn.connection_id.clone(), + quic: conn.quic_connection.clone(), + http3: conn.h3_connection.clone(), + rx_notify: conn.rx_notify.clone(), + tx_notify: conn.tx_notify.clone(), + }, + + // TODO: consolidate in ConnectionIo + connection_id: conn.connection_id.clone(), quic_connection: conn.quic_connection.clone(), h3_connection: conn.h3_connection.clone(), - drop_session: conn.drop_sessions.clone(), - tx_notify: conn.tx_notify.clone(), - rx_notify: conn.rx_notify.clone(), + event_rx, request_header: event_to_request_headers(&list)?, request_has_body: more_frames, + read_continue: false, read_ended: !more_frames, body_read: 0, body_retry_buffer: None, @@ -466,9 +469,8 @@ impl H3Session { conn.tx_notify.notify_waiters(); error!("H3 connection closed with error {:?}.", e); - return Err(e).explain_err(H3Error, |_| { - "while accepting new downstream requests" - }); + return Err(e) + .explain_err(H3Error, |_| "while accepting new downstream requests"); } } } @@ -490,47 +492,27 @@ impl H3Session { return Ok(None); } - data_finished_event(self.stream_id, &mut self.event_rx).await?; - self.read_ended = true; - - let mut buf = [0u8; MAX_IPV6_QUIC_DATAGRAM_SIZE]; - let size = match self.recv_body(&mut buf) { - Ok(size) => size, - Err(h3::Error::Done) => { - trace!("recv_body done"); - return Ok(Some(BytesMut::with_capacity(0).into())); + if !self.read_continue { + let finished = data_finished_event(self.stream_id, &mut self.event_rx).await?; + if finished { + trace!("finished event received"); + self.read_ended = true; + return Ok(None); } - Err(e) => { - return Err(Error::explain( - ReadError, - format!("reading body failed with {}", e), - )) - } - }; + } - let mut data = BytesMut::with_capacity(size); - data.put_slice(&buf[..size]); - let data: Bytes = data.into(); + let (data, continue_read) = self.conn_io.read_body(self.stream_id)?; + self.body_read += data.len(); + self.read_continue = continue_read; - self.body_read += size; if let Some(buffer) = &mut self.body_retry_buffer { buffer.write_to_buffer(&data); } - trace!("ready body len={:?}", data.len()); + trace!("read request body len={:?}", data.len()); Ok(Some(data)) } - fn recv_body(&self, out: &mut [u8]) -> h3::Result { - let mut qconn = self.quic_connection.lock(); - let mut hconn = self.h3_connection.lock(); - debug!( - "H3 connection {:?} stream {} receiving body", - self.connection_id, self.stream_id - ); - hconn.recv_body(&mut qconn, self.stream_id, out) - } - // the write_* don't have timeouts because the actual writing happens on the connection // not here. @@ -581,8 +563,9 @@ impl H3Session { } async fn send_response(&self, headers: &[T], fin: bool) -> Result<()> { - stream_capacity(&self.quic_connection, self.stream_id, header_size(headers), - &self.rx_notify, &self.tx_notify).await?; + self.conn_io + .stream_capacity(self.stream_id, header_size(headers)) + .await?; let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); @@ -595,9 +578,7 @@ impl H3Session { match hconn.send_response(&mut qconn, self.stream_id, headers, fin) { Ok(()) => Ok(()), Err(h3::Error::Done) => Ok(()), - Err(e) => Err(e).explain_err(WriteError, |_| { - "H3 connection failed to write response" - }), + Err(e) => Err(e).explain_err(WriteError, |_| "H3 connection failed to write response"), } } @@ -614,65 +595,23 @@ impl H3Session { )); }; - let mut sent_len = 0; - let mut fin = end; - while sent_len < data.len() { - let required = cmp::min(data.len() - sent_len, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let capacity = stream_capacity(&self.quic_connection, self.stream_id, required, - &self.rx_notify, &self.tx_notify).await?; - - let send = if capacity > data.len() - sent_len { - &data[sent_len..data.len()] - } else { - &data[sent_len..sent_len + capacity] - }; - - fin = sent_len + send.len() == data.len() && end; - match self.send_body(send, fin) { - Ok(sent_size) => { - debug_assert_eq!(sent_size, send.len()); - sent_len += sent_size; - } - Err(e) => { - return Err(e).explain_err(WriteError, |_| { - "writing h3 response body to downstream" - }) - } - } - } - debug_assert_eq!(fin, end); - debug_assert_eq!(sent_len, data.len()); - if end { - self.tx_notify.notify_waiters(); - } + let sent_len = self.conn_io.send_body(self.stream_id, &data, end).await?; self.body_sent += sent_len; self.send_ended = self.send_ended || end; Ok(()) } - fn send_body(&self, body: &[u8], fin: bool) -> h3::Result { - let mut qconn = self.quic_connection.lock(); - let mut hconn = self.h3_connection.lock(); - - debug!( - "H3 connection {:?} stream {} sending response body with length={:?}, finished={}", - self.connection_id, - self.stream_id, - body.len(), - fin - ); - - hconn.send_body(&mut qconn, self.stream_id, body, fin) - } - /// Write response trailers to the client, this also closes the stream. pub async fn write_trailers(&mut self, trailers: HeaderMap) -> Result<()> { if self.send_ended { warn!("Tried to write trailers after end of stream, dropping them"); return Ok(()); } else if self.body_sent == 0 { - return Err(Error::explain(H3Error,"Trying to send trailers before body is sent.")); + return Err(Error::explain( + H3Error, + "Trying to send trailers before body is sent.", + )); }; let headers = headermap_to_headervec(&trailers); @@ -692,8 +631,9 @@ impl H3Session { is_trailer: bool, fin: bool, ) -> Result<()> { - stream_capacity(&self.quic_connection, self.stream_id, header_size(headers), - &self.rx_notify, &self.tx_notify).await?; + self.conn_io + .stream_capacity(self.stream_id, header_size(headers)) + .await?; let mut qconn = self.quic_connection.lock(); let mut hconn = self.h3_connection.lock(); @@ -740,12 +680,7 @@ impl H3Session { } if self.response_header_written.is_some() { - // use an empty data frame to signal the end - self.send_body(&[], true).explain_err( - WriteError, - |e| format! {"Writing h3 response body to downstream failed. {e}"}, - )?; - self.tx_notify.notify_waiters(); + self.conn_io.finish_send(self.stream_id)?; self.send_ended = true; } // else: the response header is not sent, do nothing now. @@ -753,8 +688,6 @@ impl H3Session { Ok(()) } - - async fn reset_event(&mut self) -> Result { loop { match self.event_rx.recv().await { @@ -845,24 +778,8 @@ impl H3Session { /// /// This will send a `STOP_SENDING` and a `RESET_STREAM` for the Quic stream to the client. pub fn shutdown(&mut self) { - if !self.read_ended { - self.stream_shutdown(Shutdown::Read, 2u64); - // sent STOP_SENDING frame & stream_recv() will no longer return data - self.read_ended = true; - } - if !self.send_ended { - self.stream_shutdown(Shutdown::Write, 2u64); - // sent RESET_STREAM & stream_send() data will be ignored - self.send_ended = true; - } - } - - fn stream_shutdown(&self, direction: Shutdown, error_code: u64) { - let mut qconn = self.quic_connection.lock(); - match qconn.stream_shutdown(self.stream_id, direction, error_code) { - Ok(()) => self.tx_notify.notify_waiters(), - Err(e) => warn!("h3 stream {} shutdown failed. {:?}", self.stream_id, e), - } + self.conn_io + .shutdown_stream(self.stream_id, &mut self.read_ended, &mut self.send_ended); } // This is a hack for pingora-proxy to create subrequests from h3 server session @@ -923,10 +840,7 @@ impl H3Session { pub async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result> { if no_body_expected || self.is_body_done() { let reason = self.reset_event().await?; - Error::e_explain( - H3Error, - format!("Client closed H3, reason: {reason}"), - ) + Error::e_explain(H3Error, format!("Client closed H3, reason: {reason}")) } else { self.read_body_bytes().await } diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index 3c51996f4..9b6efbb24 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -49,9 +49,7 @@ impl Connection { format!("failed to get peer address from socket: {}", e) })?; - let configs = configs.unwrap_or( - QuicHttp3Configs::from_ca_file_path(None)? - ); + let configs = configs.unwrap_or(QuicHttp3Configs::from_ca_file_path(None)?); let (gso_enabled, pacing_enabled) = detect_gso_pacing(&io); Ok(Self::OutgoingHandshake(OutgoingHandshakeState { diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index 3758ed7cd..30f77cbdd 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -4,7 +4,8 @@ use crate::protocols::l4::quic::{ detect_gso_pacing, Connection, Crypto, SocketDetails, CONNECTION_DROP_DEQUE_INITIAL_SIZE, HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, }; -use log::{debug, error, trace, warn}; +use crate::protocols::l4::stream::Stream; +use log::{debug, trace, warn}; use parking_lot::Mutex; use pingora_error::{BError, ErrorType, OrErr}; use quiche::{h3, Connection as QuicheConnection, ConnectionId, Header, RecvInfo, Type}; @@ -65,9 +66,9 @@ pub enum IncomingConnectionHandle { impl Debug for IncomingConnectionHandle { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_str("ConnectionHandle")?; + f.write_str("IncomingConnectionHandle")?; match self { - IncomingConnectionHandle::Handshake(_) => f.write_str("::Incoming"), + IncomingConnectionHandle::Handshake(_) => f.write_str("::Handshake"), IncomingConnectionHandle::Established(_) => f.write_str("::Established"), } } @@ -120,9 +121,7 @@ pub struct Listener { } impl Listener { - pub(crate) async fn accept( - &mut self, - ) -> io::Result<(crate::protocols::l4::stream::Stream, SocketAddr)> { + pub(crate) async fn accept(&mut self) -> io::Result<(Stream, SocketAddr)> { let mut rx_buf = [0u8; MAX_IPV6_BUF_SIZE]; debug!("endpoint rx loop"); @@ -331,7 +330,7 @@ impl Listener { Ok(len) } Err(e) => { - error!("connection {:?} receive error {:?}", conn_id, e); + trace!("connection {:?} receive error {:?}", conn_id, e); Err(io::Error::new( ErrorKind::BrokenPipe, format!( diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index a12c54244..1a443df8c 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -2,8 +2,8 @@ use log::{debug, error, trace}; use parking_lot::Mutex; use pingora_error::{Error, ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; +use quiche::ConnectionId; use quiche::{h3, Config}; -use quiche::{ConnectionId, Stats}; use ring::hmac::Key; use ring::rand::SystemRandom; @@ -126,12 +126,9 @@ impl ConnectionTx { 'write: loop { let mut continue_write = false; - // update stats from connection - let max_send_burst = { - let conn = self.connection.lock(); - self.tx_stats - .max_send_burst(conn.stats(), conn.send_quantum()) - }; + // update tx stats & get current details + let (max_dgram_size, max_send_burst) = self.tx_stats.max_send_burst(&self.connection); + let mut total_write = 0; let mut dst_info = None; @@ -196,7 +193,7 @@ impl ConnectionTx { &self.socket_details.io, &out[..total_write], &dst_info, - self.tx_stats.max_datagram_size, + max_dgram_size, self.socket_details.pacing_enabled, self.socket_details.gso_enabled, ) @@ -239,15 +236,27 @@ pub struct TxStats { } impl TxStats { - pub(crate) fn new(max_send_udp_payload_size: usize) -> Self { + pub(crate) fn new() -> Self { Self { loss_rate: 0.0, max_send_burst: MAX_IPV6_BUF_SIZE, - max_datagram_size: max_send_udp_payload_size, + max_datagram_size: 0, } } - fn max_send_burst(&mut self, stats: Stats, send_quantum: usize) -> usize { + pub fn max_send_burst(&mut self, connection: &Mutex) -> (usize, usize) { + let stats; + let send_quantum; + { + let conn = connection.lock(); + let max_udp_send_payload_size = conn.max_send_udp_payload_size(); + if self.max_datagram_size != max_udp_send_payload_size { + self.max_datagram_size = max_udp_send_payload_size + } + stats = conn.stats(); + send_quantum = conn.send_quantum(); + } + // Reduce max_send_burst by 25% if loss is increasing more than 0.1%. let loss_rate = stats.lost as f64 / stats.sent as f64; @@ -258,7 +267,10 @@ impl TxStats { self.loss_rate = loss_rate; } - send_quantum.min(self.max_send_burst) / self.max_datagram_size * self.max_datagram_size + let max_send_burst = + send_quantum.min(self.max_send_burst) / self.max_datagram_size * self.max_datagram_size; + + (self.max_datagram_size, max_send_burst) } } @@ -292,7 +304,7 @@ impl QuicHttp3Configs { quic.grease(false); // default true - quic.set_max_idle_timeout(600 * 1000); // default ulimited + quic.set_max_idle_timeout(60 * 1000); // default ulimited quic.set_max_recv_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // recv default is 65527 quic.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 quic.set_initial_max_data(10_000_000); // 10 Mb @@ -330,8 +342,8 @@ impl QuicHttp3Configs { // quic.verify_peer(); default server = false; client = true // quic.discover_pmtu(false); // default false quic.grease(false); // default true - // quic.log_keys() && config.set_keylog(); // logging SSL secrets - // quic.set_ticket_key() // session ticket signer key material + // quic.log_keys() && config.set_keylog(); // logging SSL secrets + // quic.set_ticket_key() // session ticket signer key material //config.enable_early_data(); // can lead to ZeroRTT headers during handshake @@ -343,7 +355,7 @@ impl QuicHttp3Configs { // quic.set_application_protos_wire_format(); // quic.set_max_amplification_factor(3); // anti-amplification limit factor; default 3 - quic.set_max_idle_timeout(600 * 1000); // default ulimited + quic.set_max_idle_timeout(60 * 1000); // default ulimited quic.set_max_recv_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // recv default is 65527 quic.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 quic.set_initial_max_data(10_000_000); // 10 Mb diff --git a/pingora-core/src/protocols/l4/stream.rs b/pingora-core/src/protocols/l4/stream.rs index 58bc17cdb..4c4d14a88 100644 --- a/pingora-core/src/protocols/l4/stream.rs +++ b/pingora-core/src/protocols/l4/stream.rs @@ -836,8 +836,8 @@ pub mod async_write_vec { } } -pub use async_write_vec::AsyncWriteVec; use crate::listeners::ALPN; +pub use async_write_vec::AsyncWriteVec; #[derive(Debug)] struct AccumulatedDuration { diff --git a/pingora-core/src/protocols/tls/quic/client.rs b/pingora-core/src/protocols/tls/quic/client.rs index 98a83744d..da5af29d7 100644 --- a/pingora-core/src/protocols/tls/quic/client.rs +++ b/pingora-core/src/protocols/tls/quic/client.rs @@ -101,9 +101,7 @@ where conn_id, local_addr, peer_addr ); - let max_udp_payload_size = conn.max_send_udp_payload_size(); let connection = Arc::new(Mutex::new(conn)); - let tx_notify = Arc::new(Notify::new()); let rx_notify = Arc::new(Notify::new()); @@ -113,7 +111,7 @@ where connection_id: conn_id.clone(), connection: connection.clone(), tx_notify: tx_notify.clone(), - tx_stats: TxStats::new(max_udp_payload_size), + tx_stats: TxStats::new(), }; let rx = ConnectionRx { socket_details: socket_details.clone(), @@ -145,7 +143,7 @@ where handle_connection_errors(conn_id.clone(), conn.peer_error(), conn.local_error())?; if conn.is_established() { - // send HANDSHAKE_DONE Quic frame on established connection + // send response packets tx_notify.notify_waiters(); break; } @@ -154,7 +152,6 @@ where tx_notify.notify_waiters(); } - let e_state = OutgoingEstablishedState { connection_id: conn_id.clone(), connection: connection.clone(), diff --git a/pingora-core/src/protocols/tls/quic/server.rs b/pingora-core/src/protocols/tls/quic/server.rs index bd565c9bf..237c38dbb 100644 --- a/pingora-core/src/protocols/tls/quic/server.rs +++ b/pingora-core/src/protocols/tls/quic/server.rs @@ -277,7 +277,6 @@ async fn handshake_incoming( handle_connection_errors(conn_id.clone(), conn.peer_error(), conn.local_error())?; } - let max_send_udp_payload_size = conn.max_send_udp_payload_size(); let connection_id = conn_id; let connection = Arc::new(Mutex::new(conn)); let tx_notify = Arc::new(Notify::new()); @@ -306,7 +305,7 @@ async fn handshake_incoming( connection: connection.clone(), tx_notify: tx_notify.clone(), - tx_stats: TxStats::new(max_send_udp_payload_size), + tx_stats: TxStats::new(), }; let e_state = IncomingEstablishedState { diff --git a/pingora-core/src/upstreams/peer.rs b/pingora-core/src/upstreams/peer.rs index 2e9790faf..e2aef2fc6 100644 --- a/pingora-core/src/upstreams/peer.rs +++ b/pingora-core/src/upstreams/peer.rs @@ -32,11 +32,11 @@ use std::sync::Arc; use std::time::Duration; use crate::connectors::{l4::BindTo, L4Connect}; +use crate::protocols::l4::quic::QuicHttp3Configs; use crate::protocols::l4::socket::SocketAddr; use crate::protocols::tls::CaType; #[cfg(unix)] use crate::protocols::ConnFdReusable; -use crate::protocols::l4::quic::QuicHttp3Configs; use crate::protocols::TcpKeepalive; use crate::utils::tls::{get_organization_unit, CertKey}; From a39ef3fd73a125f34405a798a4fd60352b26b641 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 24 Jan 2025 16:30:50 +0100 Subject: [PATCH 35/52] unify ConnectionIo for server & client --- pingora-core/src/connectors/http/v3.rs | 9 +- pingora-core/src/protocols/http/v3/client.rs | 8 +- pingora-core/src/protocols/http/v3/mod.rs | 15 +- pingora-core/src/protocols/http/v3/server.rs | 194 +++++++++---------- 4 files changed, 105 insertions(+), 121 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 21f566c5d..69e88ac8c 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -55,7 +55,7 @@ impl ConnectionRef { impl ConnectionRef { pub(crate) fn conn_id(&self) -> &ConnectionId<'_> { - &self.0.conn_io.conn_id + &self.0.conn_io.id } pub(crate) fn conn_io(&self) -> &ConnectionIo { @@ -131,10 +131,7 @@ impl Drop for ConnectionRefInner { fn drop(&mut self) { if !self.h3poll_task.is_finished() { self.h3poll_task.abort(); - debug!( - "connection {:?} stopped Http3Poll task", - self.conn_io.conn_id - ) + debug!("connection {:?} stopped Http3Poll task", self.conn_io.id) } } } @@ -355,7 +352,7 @@ async fn handshake(mut stream: Stream, max_streams: usize) -> Result Result<()> { - let conn_id = self.conn_io.conn_id.clone(); + let conn_id = self.conn_io.id.clone(); 'poll: loop { let res = { let mut qconn = self.conn_io.quic.lock(); @@ -494,7 +494,7 @@ impl Http3Poll { } else { debug!( "connection {:?} added stream id {} to sessions", - self.conn_io.conn_id, stream_id + self.conn_io.id, stream_id ) } } @@ -507,7 +507,7 @@ impl Http3Poll { if let Some(_sender) = self.sessions.remove(&stream_id) { debug!( "connection {:?} removed stream id {} from sessions", - self.conn_io.conn_id, stream_id + self.conn_io.id, stream_id ) } else { return Err(Error::explain( diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 199cf623e..cc40eafbc 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -44,7 +44,7 @@ pub mod server; #[derive(Clone)] pub(crate) struct ConnectionIo { - pub(crate) conn_id: ConnectionId<'static>, + pub(crate) id: ConnectionId<'static>, pub(crate) quic: Arc>, pub(crate) http3: Arc>, @@ -52,7 +52,7 @@ pub(crate) struct ConnectionIo { // receive notification on Quic recv, used to check stream capacity // as it only increases after MaxData or MaxStreamData frame was received pub(crate) rx_notify: Arc, - + // trigger Quic send, continue ConnectionTx write loop pub(crate) tx_notify: Arc, } @@ -62,7 +62,7 @@ impl ConnectionIo { qconn.is_draining() } - fn stream_capacity( + fn capacity( &self, stream_id: u64, required: usize, @@ -87,7 +87,7 @@ impl ConnectionIo { } else { self.tx_notify.notify_waiters(); self.rx_notify.notified().await; - self.stream_capacity(stream_id, required).await + self.capacity(stream_id, required).await } }) } @@ -97,7 +97,7 @@ impl ConnectionIo { let mut fin = end; while sent_len < data.len() { let required = cmp::min(data.len() - sent_len, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let capacity = self.stream_capacity(stream_id, required).await?; + let capacity = self.capacity(stream_id, required).await?; let send = if capacity > data.len() - sent_len { &data[sent_len..data.len()] @@ -185,14 +185,15 @@ impl ConnectionIo { fn read_body_conn(&self, stream_id: u64, out: &mut [u8]) -> quiche::h3::Result { let mut qconn = self.quic.lock(); let mut hconn = self.http3.lock(); + debug!( "H3 connection {:?} stream {} receiving body", - self.conn_id, stream_id + self.id, stream_id ); hconn.recv_body(&mut qconn, stream_id, out) } - fn shutdown_stream(&self, stream_id: u64, read_ended: &mut bool, write_ended: &mut bool) { + fn shutdown(&self, stream_id: u64, read_ended: &mut bool, write_ended: &mut bool) { let mut qconn = self.quic.lock(); if !*read_ended { // sent STOP_SENDING frame & stream_recv() will no longer return data diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index a5dc0ad8a..273ee4e7d 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -35,14 +35,13 @@ use pingora_error::ErrorType::{ConnectError, H3Error, InternalError, ReadError, use pingora_error::{Error, OrErr, Result}; use pingora_http::{RequestHeader, ResponseHeader}; pub use quiche::h3::Config as H3Options; -use quiche::h3::{Connection as QuicheH3Connection, Event, NameValue}; -use quiche::{h3, Connection as QuicheConnection, ConnectionId}; +use quiche::h3::{self, Connection as QuicheH3Connection, Event, NameValue}; +use quiche::ConnectionId; use std::collections::VecDeque; use std::fmt::Debug; use std::sync::Arc; use std::time::Duration; -use tokio::sync::mpsc::{Receiver, Sender}; -use tokio::sync::{mpsc, Notify}; +use tokio::sync::mpsc::{self, Receiver, Sender}; const BODY_BUF_LIMIT: usize = 1024 * 64; const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); @@ -59,29 +58,29 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ + let (conn_io, drop_connections) = match conn { + Connection::IncomingEstablished(e_state) => { let hconn = { let http3_config = if let Some(h3_options) = options { h3_options } else { - &state.http3_config + &e_state.http3_config }; - let mut qconn = state.connection.lock(); + let mut qconn = e_state.connection.lock(); QuicheH3Connection::with_transport(&mut qconn, http3_config) .explain_err(ConnectError, |_| "failed to create H3 connection")? }; - state.tx_notify.notify_waiters(); - - ( - state.connection_id.clone(), - state.connection.clone(), - state.drop_connection.clone(), - hconn, - state.tx_notify.clone(), - state.rx_notify.clone(), - ) + e_state.tx_notify.notify_waiters(); + + let conn_io = ConnectionIo { + id: e_state.connection_id.clone(), + quic: e_state.connection.clone(), + http3: Arc::new(Mutex::new(hconn)), + rx_notify: e_state.rx_notify.clone(), + tx_notify: e_state.tx_notify.clone(), + }; + (conn_io, e_state.drop_connection.clone()) } _ => { return Err(Error::explain( @@ -93,14 +92,9 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

) -> Result

, - drop_quic_connection: Arc>>>, + _l4stream: Stream, // ensure the stream will not be dropped until connection is closed - quic_connection: Arc>, - h3_connection: Arc>, - - tx_notify: Arc, - rx_notify: Arc, + conn_io: ConnectionIo, + drop_connections: Arc>>>, sessions: StreamIdHashMap>, drop_sessions: Arc>>, @@ -133,29 +122,33 @@ pub struct H3Connection { impl Drop for H3Connection { fn drop(&mut self) { - let mut drop_quic_connection = self.drop_quic_connection.lock(); - drop_quic_connection.push_back(self.connection_id.clone()); - debug!("drop connection {:?}", self.connection_id); + let mut drop_connections = self.drop_connections.lock(); + drop_connections.push_back(self.conn_id().clone()); + debug!("drop connection {:?}", self.conn_id()); } } impl H3Connection { + fn conn_id(&self) -> &ConnectionId<'static> { + &self.conn_io.id + } + pub async fn graceful_shutdown(&mut self) -> Result<()> { // send GOAWAY frame { - let mut qconn = self.quic_connection.lock(); - let mut hconn = self.h3_connection.lock(); + let mut qconn = self.conn_io.quic.lock(); + let mut hconn = self.conn_io.http3.lock(); - debug!("H3 connection {:?} sending GoAway", self.connection_id); + debug!("H3 connection {:?} sending GoAway", self.conn_id()); hconn .send_goaway(&mut qconn, self.max_accepted_stream_id) .explain_err(H3Error, |_| "failed to send graceful shutdown")?; - self.tx_notify.notify_waiters(); + self.conn_io.tx_notify.notify_waiters(); } let drain = async { while !self.sessions.is_empty() { - self.rx_notify.notified().await + self.conn_io.rx_notify.notified().await } }; @@ -168,11 +161,11 @@ impl H3Connection { // close quic connection { - let mut qconn = self.quic_connection.lock(); + let mut qconn = self.conn_io.quic.lock(); qconn .close(false, 0x00, b"graceful shutdown") .explain_err(H3Error, |_| "failed to close quic connection")?; - self.tx_notify.notify_waiters(); + self.conn_io.tx_notify.notify_waiters(); } if is_timeout { @@ -194,13 +187,15 @@ impl H3Connection { None => { warn!( "connection {:?} failed to remove stream {} from sessions", - self.connection_id, stream_id + self.conn_id(), + stream_id ) } Some(_) => { debug!( "connection {:?} stream {} removed from sessions", - self.connection_id, stream_id + self.conn_id(), + stream_id ); } }; @@ -213,18 +208,11 @@ impl H3Connection { /// [`pingora_http::RequestHeader`]. The [`H3Session`] is built around [`pingora_http`] structs and /// converts to [`quiche::h3::Event`] where needed. pub struct H3Session { - pub(crate) connection_id: ConnectionId<'static>, pub(crate) stream_id: u64, - conn_io: ConnectionIo, - // TODO: consolidate in ConnecitonIo - quic_connection: Arc>, - h3_connection: Arc>, // notify during drop to remove event_tx from active sessions drop_session: Arc>>, - // trigger Quic send, continue ConnectionTx write loop - tx_notify: Arc, // HTTP3 event channel for this stream_id event_rx: Receiver, @@ -259,7 +247,8 @@ impl Drop for H3Session { drop_sessions.push_back(self.stream_id); debug!( "H3 connection {:?} drop stream {}", - self.connection_id, self.stream_id + self.conn_id(), + self.stream_id ); } } @@ -284,8 +273,8 @@ impl H3Session { ) -> Result> { 'poll: loop { let poll = { - let mut qconn = conn.quic_connection.lock(); - let mut hconn = conn.h3_connection.lock(); + let mut qconn = conn.conn_io.quic.lock(); + let mut hconn = conn.conn_io.http3.lock(); // NOTE: poll() drives the entire Quic/HTTP3 connection hconn.poll(&mut qconn) }; @@ -302,7 +291,9 @@ impl H3Session { if let Some(channel) = conn.sessions.get(&stream_id) { debug!( "H3 connection {:?} stream {} forward event={:?}", - conn.connection_id, stream_id, ev + conn.conn_id(), + stream_id, + ev ); channel.send(ev).await.explain_err(WriteError, |e| { format!("failed to send on event channel with {}", e) @@ -310,7 +301,9 @@ impl H3Session { } else { debug!( "H3 connection {:?} stream {} received event {:?}", - conn.connection_id, stream_id, &ev + conn.conn_id(), + stream_id, + &ev ); match ev { Event::Data @@ -323,17 +316,17 @@ impl H3Session { info!("stream_id {} received GoAway", stream_id); conn.received_goaway = Some(stream_id); - let mut qconn = conn.quic_connection.lock(); - let mut hconn = conn.h3_connection.lock(); + let mut qconn = conn.conn_io.quic.lock(); + let mut hconn = conn.conn_io.http3.lock(); hconn .send_goaway(&mut qconn, conn.max_accepted_stream_id) .explain_err(InternalError, |_| "failed to send goaway")?; - conn.tx_notify.notify_waiters(); + conn.conn_io.tx_notify.notify_waiters(); } Event::Headers { list, more_frames } => { trace!( "H3 connection {:?} request headers={:?}, more_frames={:?}", - conn.connection_id, + conn.conn_id(), &list, &more_frames ); @@ -343,20 +336,9 @@ impl H3Session { let session = H3Session { stream_id, - conn_io: ConnectionIo { - conn_id: conn.connection_id.clone(), - quic: conn.quic_connection.clone(), - http3: conn.h3_connection.clone(), - rx_notify: conn.rx_notify.clone(), - tx_notify: conn.tx_notify.clone(), - }, - - // TODO: consolidate in ConnectionIo - connection_id: conn.connection_id.clone(), - quic_connection: conn.quic_connection.clone(), - h3_connection: conn.h3_connection.clone(), + conn_io: conn.conn_io.clone(), + drop_session: conn.drop_sessions.clone(), - tx_notify: conn.tx_notify.clone(), event_rx, @@ -379,7 +361,8 @@ impl H3Session { false, "H3 connection {:?} stream {} existing \ session is not allowed", - conn.connection_id, stream_id + conn.conn_id(), + stream_id ) }; @@ -390,7 +373,7 @@ impl H3Session { } } Err(h3::Error::Done) => { - debug!("H3 connection {:?} no events available", conn.connection_id); + debug!("H3 connection {:?} no events available", conn.conn_id()); // TODO: in case PriorityUpdate was triggered call take_priority_update() here conn.sessions_housekeeping().await; @@ -398,21 +381,21 @@ impl H3Session { let is_closed; let timeout; { - let qconn = conn.quic_connection.lock(); + let qconn = conn.conn_io.quic.lock(); is_closed = qconn.is_closed() || !(qconn.is_established() || qconn.is_in_early_data()); if is_closed { if let Some(e) = qconn.peer_error() { debug!( "connection {:?} peer error reason: {}", - conn.connection_id, + conn.conn_id(), String::from_utf8_lossy(e.reason.as_slice()).to_string() ); } if let Some(e) = qconn.local_error() { debug!( "connection {:?} local error reason: {}", - conn.connection_id, + conn.conn_id(), String::from_utf8_lossy(e.reason.as_slice()).to_string() ); } @@ -424,39 +407,39 @@ impl H3Session { if !conn.sessions.is_empty() { warn!( "H3 connection {:?} closed with open {} sessions", - conn.connection_id, + conn.conn_id(), conn.sessions.len() ); } else { - debug!("H3 connection {:?} closed", conn.connection_id); + debug!("H3 connection {:?} closed", conn.conn_id()); } - conn.tx_notify.notify_waiters(); + conn.conn_io.tx_notify.notify_waiters(); return Ok(None); } // race for new data on connection or timeout tokio::select! { - _data = conn.rx_notify.notified() => {} + _data = conn.conn_io.rx_notify.notified() => {} _timedout = async { if let Some(timeout) = timeout { - debug!("connection {:?} timeout {:?}", conn.connection_id, timeout); + debug!("connection {:?} timeout {:?}", conn.conn_id(), timeout); tokio::time::sleep(timeout).await } else { - debug!("connection {:?} timeout not present", conn.connection_id); + debug!("connection {:?} timeout not present", conn.conn_id()); tokio::time::sleep(Duration::MAX).await } } => { conn.sessions_housekeeping().await; if !conn.sessions.is_empty() { warn!("connection {:?} timed out with {} open sessions", - conn.connection_id, conn.sessions.len()); + conn.conn_id(), conn.sessions.len()); } - let mut qconn = conn.quic_connection.lock(); + let mut qconn = conn.conn_io.quic.lock(); // closes connection qconn.on_timeout(); if let Some(timeout) = timeout { - debug!("connection {:?} timed out {:?}", conn.connection_id, timeout); + debug!("connection {:?} timed out {:?}", conn.conn_id(), timeout); } } } @@ -466,7 +449,7 @@ impl H3Session { // the appropriate error code, using the transport’s close() method. // send the close() event - conn.tx_notify.notify_waiters(); + conn.conn_io.tx_notify.notify_waiters(); error!("H3 connection closed with error {:?}.", e); return Err(e) @@ -554,7 +537,7 @@ impl H3Session { let headers = response_headers_to_event(&header); self.send_response(headers.as_slice(), end).await?; if end { - self.tx_notify.notify_waiters(); + self.conn_io.tx_notify.notify_waiters(); } self.response_header_written = Some(header); @@ -564,15 +547,18 @@ impl H3Session { async fn send_response(&self, headers: &[T], fin: bool) -> Result<()> { self.conn_io - .stream_capacity(self.stream_id, header_size(headers)) + .capacity(self.stream_id, header_size(headers)) .await?; - let mut qconn = self.quic_connection.lock(); - let mut hconn = self.h3_connection.lock(); + let mut qconn = self.conn_io.quic.lock(); + let mut hconn = self.conn_io.http3.lock(); debug!( "H3 connection {:?} stream {} sending response headers={:?}, finished={}", - self.connection_id, self.stream_id, headers, fin + self.conn_id(), + self.stream_id, + headers, + fin ); match hconn.send_response(&mut qconn, self.stream_id, headers, fin) { @@ -632,24 +618,20 @@ impl H3Session { fin: bool, ) -> Result<()> { self.conn_io - .stream_capacity(self.stream_id, header_size(headers)) + .capacity(self.stream_id, header_size(headers)) .await?; - let mut qconn = self.quic_connection.lock(); - let mut hconn = self.h3_connection.lock(); + let mut qconn = self.conn_io.quic.lock(); + let mut hconn = self.conn_io.http3.lock(); debug!( "H3 connection {:?} stream {} sending additional headers={:?}, is_trailer={:?} finished={}", - self.connection_id, - self.stream_id, - headers, - is_trailer, - fin + self.conn_id(), self.stream_id, headers, is_trailer, fin ); match hconn.send_additional_headers(&mut qconn, stream_id, headers, is_trailer, fin) { Ok(()) => { - self.tx_notify.notify_waiters(); + self.conn_io.tx_notify.notify_waiters(); Ok(()) } Err(e) => Err(e).explain_err(WriteError, |_| { @@ -779,7 +761,7 @@ impl H3Session { /// This will send a `STOP_SENDING` and a `RESET_STREAM` for the Quic stream to the client. pub fn shutdown(&mut self) { self.conn_io - .shutdown_stream(self.stream_id, &mut self.read_ended, &mut self.send_ended); + .shutdown(self.stream_id, &mut self.read_ended, &mut self.send_ended); } // This is a hack for pingora-proxy to create subrequests from h3 server session @@ -875,4 +857,8 @@ impl H3Session { pub fn client_addr(&self) -> Option<&SocketAddr> { self.digest.socket_digest.as_ref().map(|d| d.peer_addr())? } + + fn conn_id(&self) -> &ConnectionId<'_> { + &self.conn_io.id + } } From 26cadf4a02ccab19c79b9bfb766fc86da0a001ab Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 24 Jan 2025 18:24:17 +0100 Subject: [PATCH 36/52] add connection timeout for Http3Poll task --- pingora-core/src/protocols/http/v3/client.rs | 158 +++++++++++++----- pingora-core/src/protocols/http/v3/server.rs | 63 ++++--- .../src/protocols/l4/quic/connector.rs | 17 +- pingora-core/src/protocols/l4/quic/mod.rs | 27 +++ pingora-core/src/protocols/tls/quic/client.rs | 5 +- pingora-core/src/protocols/tls/quic/mod.rs | 31 ---- pingora-core/src/protocols/tls/quic/server.rs | 16 +- pingora-core/tests/test_basic.rs | 1 - 8 files changed, 193 insertions(+), 125 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index dcf6c51d7..d4f22165d 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -4,17 +4,18 @@ use crate::protocols::http::v3::{ data_finished_event, event_to_response_headers, headervec_to_headermap, request_headers_to_event, ConnectionIo, H3_SESSION_EVENTS_CHANNEL_SIZE, }; +use crate::protocols::l4::quic::handle_connection_errors; use crate::protocols::l4::socket::SocketAddr; use crate::protocols::{Digest, UniqueID, UniqueIDType}; use bytes::Bytes; use http::HeaderMap; -use log::{debug, trace, warn}; +use log::{debug, error, trace, warn}; use parking_lot::Mutex; use pingora_error::ErrorType::{H3Error, InternalError, InvalidHTTPHeader, ReadError, WriteError}; use pingora_error::{Error, ErrorType, OrErr, Result}; use pingora_http::{RequestHeader, ResponseHeader}; -use quiche::h3; -use quiche::h3::{Event, Header, NameValue}; +use quiche::h3::{self, Event, Header, NameValue}; +use quiche::ConnectionId; use std::collections::VecDeque; use std::fmt::Debug; use std::sync::Arc; @@ -439,28 +440,88 @@ impl Http3Poll { hconn.poll(&mut qconn) }; + // TODO: unify with server from_h3_conn let (stream_id, ev) = match res { Ok((stream, ev)) => (stream, ev), - Err(e) => match e { - h3::Error::Done => { - self.sessions_housekeeping()?; - - // TODO: connection timeout racing - self.conn_io.rx_notify.notified().await; - continue 'poll; + Err(h3::Error::Done) => { + debug!("H3 connection {:?} no events available", self.conn_id()); + + self.sessions_housekeeping(); + + let timeout; + { + let qconn = self.conn_io.quic.lock(); + let is_closed = qconn.is_closed() + || !(qconn.is_established() || qconn.is_in_early_data()); + if is_closed { + if !self.sessions.is_empty() { + warn!( + "H3 connection {:?} closed with open {} sessions", + self.conn_id(), + self.sessions.len() + ); + } else { + debug!("H3 connection {:?} closed", self.conn_id()); + } + + // send close in case it is a local error + self.conn_io.tx_notify.notify_waiters(); + + return handle_connection_errors( + self.conn_id(), + qconn.local_error(), + qconn.peer_error(), + ); + } + timeout = qconn.timeout(); } - _ => { - break 'poll Err(e).explain_err(H3Error, |_| { - format!("failed to poll h3 connection {:?}", e) - }) + + tokio::select! { + _data = self.conn_io.rx_notify.notified() => { /* continue */ } + _timedout = async { + if let Some(timeout) = timeout { + debug!("connection {:?} timeout {:?}", self.conn_id(), timeout); + tokio::time::sleep(timeout).await + } else { + debug!("connection {:?} timeout not present", self.conn_id()); + tokio::time::sleep(Duration::MAX).await + } + } => { + self.sessions_housekeeping(); + if !self.sessions.is_empty() { + warn!("connection {:?} timed out with {} open sessions", + self.conn_id(), self.sessions.len()); + } + let mut qconn = self.conn_io.quic.lock(); + // closes connection + qconn.on_timeout(); + if let Some(timeout) = timeout { + debug!("connection {:?} timed out {:?}", self.conn_id(), timeout); + } + } } - }, + continue 'poll; + } + Err(e) => { + // If an error occurs while processing data, the connection is closed with + // the appropriate error code, using the transport’s close() method. + + // send the close() event + self.conn_io.tx_notify.notify_waiters(); + + error!( + "H3 connection {:?} closed with error {:?}.", + self.conn_io.id, e + ); + return Err(e) + .explain_err(H3Error, |_| "failed to poll H3 connection for new events"); + } }; let session = if let Some(session) = self.sessions.get_mut(&stream_id) { session } else { - self.add_sessions()?; + self.add_sessions(); let Some(session) = self.sessions.get_mut(&stream_id) else { return Err(Error::explain( InternalError, @@ -477,45 +538,56 @@ impl Http3Poll { } } - fn sessions_housekeeping(&mut self) -> Result<()> { - self.drop_sessions()?; + fn sessions_housekeeping(&mut self) { + self.drop_sessions(); self.add_sessions() } - fn add_sessions(&mut self) -> Result<()> { + fn add_sessions(&mut self) { let mut add_sessions = self.add_sessions.lock(); while let Some((stream_id, sender)) = add_sessions.pop_front() { - if let Some(_sender) = self.sessions.insert(stream_id, sender) { - debug_assert!(false, "stream id {} existed", stream_id); - return Err(Error::explain( - InternalError, - format!("stream id {} was already present in sessions", stream_id), - )); - } else { - debug!( - "connection {:?} added stream id {} to sessions", - self.conn_io.id, stream_id - ) + match self.sessions.insert(stream_id, sender) { + Some(_) => { + warn!( + "connection {:?} stream {} was already present in sessions", + self.conn_id(), + stream_id + ); + debug_assert!(false) + } + None => { + debug!( + "connection {:?} added stream id {} to sessions", + self.conn_io.id, stream_id + ) + } } } - Ok(()) } - fn drop_sessions(&mut self) -> Result<()> { + fn drop_sessions(&mut self) { let mut drop_sessions = self.drop_sessions.lock(); while let Some(stream_id) = drop_sessions.pop_front() { - if let Some(_sender) = self.sessions.remove(&stream_id) { - debug!( - "connection {:?} removed stream id {} from sessions", - self.conn_io.id, stream_id - ) - } else { - return Err(Error::explain( - InternalError, - format!("failed to remove session with stream id {}", stream_id), - )); + match self.sessions.remove(&stream_id) { + None => { + warn!( + "connection {:?} failed to remove stream {} from sessions", + self.conn_id(), + stream_id + ); + debug_assert!(false) + } + Some(_) => { + debug!( + "connection {:?} removed stream id {} from sessions", + self.conn_io.id, stream_id + ) + } } } - Ok(()) + } + + fn conn_id(&self) -> &ConnectionId<'static> { + &self.conn_io.id } } diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 273ee4e7d..ae671dab3 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -24,7 +24,7 @@ use crate::protocols::http::v3::{ H3_SESSION_EVENTS_CHANNEL_SIZE, }; use crate::protocols::http::HttpTask; -use crate::protocols::l4::quic::Connection; +use crate::protocols::l4::quic::{handle_connection_errors, Connection}; use crate::protocols::{Digest, SocketAddr, Stream}; use bytes::Bytes; use http::uri::PathAndQuery; @@ -178,7 +178,7 @@ impl H3Connection { } } - async fn sessions_housekeeping(&mut self) { + fn sessions_housekeeping(&mut self) { let mut drop_sessions = self.drop_sessions.lock(); // housekeeping finished sessions @@ -376,51 +376,41 @@ impl H3Session { debug!("H3 connection {:?} no events available", conn.conn_id()); // TODO: in case PriorityUpdate was triggered call take_priority_update() here - conn.sessions_housekeeping().await; + conn.sessions_housekeeping(); - let is_closed; let timeout; { let qconn = conn.conn_io.quic.lock(); - is_closed = qconn.is_closed() + let is_closed = qconn.is_closed() || !(qconn.is_established() || qconn.is_in_early_data()); if is_closed { - if let Some(e) = qconn.peer_error() { - debug!( - "connection {:?} peer error reason: {}", - conn.conn_id(), - String::from_utf8_lossy(e.reason.as_slice()).to_string() - ); - } - if let Some(e) = qconn.local_error() { - debug!( - "connection {:?} local error reason: {}", + if !conn.sessions.is_empty() { + warn!( + "H3 connection {:?} closed with open {} sessions", conn.conn_id(), - String::from_utf8_lossy(e.reason.as_slice()).to_string() + conn.sessions.len() ); + } else { + debug!("H3 connection {:?} closed", conn.conn_id()); } - } - timeout = qconn.timeout(); - } + // send close in case it is a local error + conn.conn_io.tx_notify.notify_waiters(); - if is_closed { - if !conn.sessions.is_empty() { - warn!( - "H3 connection {:?} closed with open {} sessions", + return match handle_connection_errors( conn.conn_id(), - conn.sessions.len() - ); - } else { - debug!("H3 connection {:?} closed", conn.conn_id()); + qconn.local_error(), + qconn.peer_error(), + ) { + Ok(()) => Ok(None), // closes the connection + Err(e) => Err(e), + }; } - - conn.conn_io.tx_notify.notify_waiters(); - return Ok(None); + timeout = qconn.timeout(); } // race for new data on connection or timeout tokio::select! { - _data = conn.conn_io.rx_notify.notified() => {} + _data = conn.conn_io.rx_notify.notified() => { /* continue */ } _timedout = async { if let Some(timeout) = timeout { debug!("connection {:?} timeout {:?}", conn.conn_id(), timeout); @@ -430,7 +420,7 @@ impl H3Session { tokio::time::sleep(Duration::MAX).await } } => { - conn.sessions_housekeeping().await; + conn.sessions_housekeeping(); if !conn.sessions.is_empty() { warn!("connection {:?} timed out with {} open sessions", conn.conn_id(), conn.sessions.len()); @@ -443,6 +433,7 @@ impl H3Session { } } } + continue 'poll; } Err(e) => { // If an error occurs while processing data, the connection is closed with @@ -451,9 +442,13 @@ impl H3Session { // send the close() event conn.conn_io.tx_notify.notify_waiters(); - error!("H3 connection closed with error {:?}.", e); + error!( + "H3 connection {:?} closed with error {:?}.", + conn.conn_id(), + e + ); return Err(e) - .explain_err(H3Error, |_| "while accepting new downstream requests"); + .explain_err(H3Error, |_| "failed to poll H3 connection for new events"); } } } diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index 9b6efbb24..c545f901d 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -2,7 +2,7 @@ use crate::protocols::l4::quic::Connection; use crate::protocols::l4::quic::{ detect_gso_pacing, Crypto, QuicHttp3Configs, SocketDetails, MAX_IPV6_BUF_SIZE, }; -use log::{debug, trace}; +use log::{debug, error, trace}; use parking_lot::Mutex; use pingora_error::{ErrorType, OrErr, Result}; use quiche::Connection as QuicheConnection; @@ -82,18 +82,18 @@ impl ConnectionRx { pub async fn start(self) -> Result<()> { let socket = self.socket_details.io; let local_addr = self.socket_details.local_addr; - let id = self.connection_id; + let conn_id = self.connection_id; // TODO: support ip switching on local & peer address // would require socket re-binding let mut buf = [0u8; MAX_IPV6_BUF_SIZE]; - debug!("connection {:?} rx read", id); + debug!("connection {:?} rx read", conn_id); 'read: loop { let (size, recv_info) = match socket.try_recv_from(&mut buf) { Ok((size, from)) => { trace!( "connection {:?} network received from={} length={}", - id, + conn_id, from, size ); @@ -122,11 +122,18 @@ impl ConnectionRx { let mut conn = self.connection.lock(); match conn.recv(&mut buf[..size], recv_info) { Ok(_size) => { - debug!("connection {:?} received {}", id, size); + debug!("connection {:?} received {}", conn_id, size); self.tx_notify.notify_waiters(); self.rx_notify.notify_waiters(); } Err(e) => { + // If an error occurs while processing data, the connection is closed with + // the appropriate error code, using the transport’s close() method. + + // send the close() event + self.tx_notify.notify_waiters(); + + error!("H3 connection {:?} closed with error {:?}.", conn_id, e); return Err(e).explain_err(ErrorType::ReadError, |_| { "failed to receive data from socket on connection" }); diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 1a443df8c..0e0730fcd 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -10,6 +10,7 @@ use ring::rand::SystemRandom; use std::fmt::{Debug, Formatter}; use std::net::SocketAddr; +use pingora_error::ErrorType::ConnectionClosed; use std::os::fd::{AsRawFd, RawFd}; use std::pin::Pin; use std::sync::Arc; @@ -654,3 +655,29 @@ impl AsyncRead for Connection { todo!() } } + +pub(crate) fn handle_connection_errors( + conn_id: &ConnectionId<'_>, + local_error: Option<&quiche::ConnectionError>, + peer_error: Option<&quiche::ConnectionError>, +) -> Result<()> { + if let Some(e) = local_error { + error!( + "connection {:?} local error {}", + conn_id, + String::from_utf8_lossy(e.reason.as_slice()).to_string() + ); + return Err(e).explain_err(ConnectionClosed, |_| "local connection error"); + } + + if let Some(e) = peer_error { + error!( + "connection {:?} peer error {}", + conn_id, + String::from_utf8_lossy(e.reason.as_slice()).to_string() + ); + return Err(e).explain_err(ConnectionClosed, |_| "peer connection error"); + } + + Ok(()) +} diff --git a/pingora-core/src/protocols/tls/quic/client.rs b/pingora-core/src/protocols/tls/quic/client.rs index da5af29d7..7aa8a7d4a 100644 --- a/pingora-core/src/protocols/tls/quic/client.rs +++ b/pingora-core/src/protocols/tls/quic/client.rs @@ -3,8 +3,7 @@ use crate::protocols::l4::quic::connector::{ ConnectionRx, OutgoingEstablishedState, OutgoingHandshakeState, }; use crate::protocols::l4::quic::id_token::generate_outgoing_cid; -use crate::protocols::l4::quic::{Connection, ConnectionTx, TxStats}; -use crate::protocols::tls::quic::handle_connection_errors; +use crate::protocols::l4::quic::{handle_connection_errors, Connection, ConnectionTx, TxStats}; use crate::protocols::IO; use crate::upstreams::peer::Peer; use log::{info, trace}; @@ -141,7 +140,7 @@ where conn.local_error() ); - handle_connection_errors(conn_id.clone(), conn.peer_error(), conn.local_error())?; + handle_connection_errors(&conn_id, conn.peer_error(), conn.local_error())?; if conn.is_established() { // send response packets tx_notify.notify_waiters(); diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs index 136f1d0f7..c07f47e0f 100644 --- a/pingora-core/src/protocols/tls/quic/mod.rs +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -1,33 +1,2 @@ -use log::error; -use pingora_error::ErrorType::HandshakeError; -use pingora_error::OrErr; -use quiche::ConnectionId; - pub mod client; pub mod server; - -fn handle_connection_errors( - conn_id: ConnectionId<'_>, - local_error: Option<&quiche::ConnectionError>, - peer_error: Option<&quiche::ConnectionError>, -) -> pingora_error::Result<()> { - if let Some(e) = local_error { - error!( - "connection {:?} local error reason: {}", - conn_id, - String::from_utf8_lossy(e.reason.as_slice()).to_string() - ); - return Err(e).explain_err(HandshakeError, |_| "local error during handshake"); - } - - if let Some(e) = peer_error { - error!( - "connection {:?} peer error reason: {}", - conn_id, - String::from_utf8_lossy(e.reason.as_slice()).to_string() - ); - return Err(e).explain_err(HandshakeError, |_| "peer error during handshake"); - } - - Ok(()) -} diff --git a/pingora-core/src/protocols/tls/quic/server.rs b/pingora-core/src/protocols/tls/quic/server.rs index 237c38dbb..1c6db9862 100644 --- a/pingora-core/src/protocols/tls/quic/server.rs +++ b/pingora-core/src/protocols/tls/quic/server.rs @@ -2,9 +2,10 @@ use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; use crate::protocols::l4::quic::listener::{ EstablishedHandle, HandshakeResponse, IncomingEstablishedState, IncomingHandshakeState, }; -use crate::protocols::l4::quic::{Connection, ConnectionTx, TxStats, MAX_IPV6_QUIC_DATAGRAM_SIZE}; +use crate::protocols::l4::quic::{ + handle_connection_errors, Connection, ConnectionTx, TxStats, MAX_IPV6_QUIC_DATAGRAM_SIZE, +}; use crate::protocols::l4::stream::Stream as L4Stream; -use crate::protocols::tls::quic::handle_connection_errors; use crate::protocols::ConnectionState; use log::{debug, error, trace, warn}; use parking_lot::Mutex; @@ -274,21 +275,20 @@ async fn handshake_incoming( conn.local_error() ); - handle_connection_errors(conn_id.clone(), conn.peer_error(), conn.local_error())?; + handle_connection_errors(&conn_id, conn.peer_error(), conn.local_error())?; } - let connection_id = conn_id; let connection = Arc::new(Mutex::new(conn)); let tx_notify = Arc::new(Notify::new()); let rx_notify = Arc::new(Notify::new()); debug!( "connection {:?} handshake successful, udp_rx {}", - connection_id, + conn_id, udp_rx.len() ); let handle = EstablishedHandle { - connection_id: connection_id.clone(), + connection_id: conn_id.clone(), connection: connection.clone(), rx_notify: rx_notify.clone(), tx_notify: tx_notify.clone(), @@ -301,7 +301,7 @@ async fn handshake_incoming( let tx = ConnectionTx { socket_details: socket_details.clone(), - connection_id: connection_id.clone(), + connection_id: conn_id.clone(), connection: connection.clone(), tx_notify: tx_notify.clone(), @@ -309,7 +309,7 @@ async fn handshake_incoming( }; let e_state = IncomingEstablishedState { - connection_id: connection_id.clone(), + connection_id: conn_id.clone(), connection: connection.clone(), http3_config: configs.http3().clone(), diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index ff71030c5..c8dcf1297 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -30,7 +30,6 @@ use h3i::config::Config; use h3i::frame::H3iFrame; use h3i::quiche::h3::frame::Frame; use h3i::quiche::h3::Header; -use pingora_core::prelude::HttpPeer; #[tokio::test] async fn test_http() { From 62300711965edc32f1c0e22fce0a28f48f58ebc9 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Sat, 25 Jan 2025 17:18:34 +0100 Subject: [PATCH 37/52] unify timeout & error handling on H3 connection --- pingora-core/src/connectors/http/v3.rs | 21 +-- pingora-core/src/protocols/http/v3/client.rs | 177 ++++++------------ pingora-core/src/protocols/http/v3/mod.rs | 179 ++++++++++++++++++- pingora-core/src/protocols/http/v3/server.rs | 127 ++----------- 4 files changed, 248 insertions(+), 256 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 69e88ac8c..9177791b8 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -55,7 +55,7 @@ impl ConnectionRef { impl ConnectionRef { pub(crate) fn conn_id(&self) -> &ConnectionId<'_> { - &self.0.conn_io.id + &self.0.conn_io.conn_id() } pub(crate) fn conn_io(&self) -> &ConnectionIo { @@ -131,7 +131,10 @@ impl Drop for ConnectionRefInner { fn drop(&mut self) { if !self.h3poll_task.is_finished() { self.h3poll_task.abort(); - debug!("connection {:?} stopped Http3Poll task", self.conn_io.id) + debug!( + "connection {:?} stopped Http3Poll task", + self.conn_io.conn_id() + ) } } } @@ -310,11 +313,7 @@ impl ConnectionRef { } pub fn more_streams_allowed(&self) -> bool { - let qconn = self.0.conn_io.quic.lock(); - qconn.is_established() - && !qconn.is_closed() - && !qconn.is_draining() - && qconn.peer_streams_left_bidi() > 0 + self.conn_io().more_streams_available() } } @@ -351,13 +350,7 @@ async fn handshake(mut stream: Stream, max_streams: usize) -> Result (stream, ev), - Err(h3::Error::Done) => { - debug!("H3 connection {:?} no events available", self.conn_id()); - - self.sessions_housekeeping(); - - let timeout; - { - let qconn = self.conn_io.quic.lock(); - let is_closed = qconn.is_closed() - || !(qconn.is_established() || qconn.is_in_early_data()); - if is_closed { - if !self.sessions.is_empty() { - warn!( - "H3 connection {:?} closed with open {} sessions", - self.conn_id(), - self.sessions.len() - ); - } else { - debug!("H3 connection {:?} closed", self.conn_id()); - } - - // send close in case it is a local error - self.conn_io.tx_notify.notify_waiters(); - - return handle_connection_errors( - self.conn_id(), - qconn.local_error(), - qconn.peer_error(), - ); - } - timeout = qconn.timeout(); - } - - tokio::select! { - _data = self.conn_io.rx_notify.notified() => { /* continue */ } - _timedout = async { - if let Some(timeout) = timeout { - debug!("connection {:?} timeout {:?}", self.conn_id(), timeout); - tokio::time::sleep(timeout).await - } else { - debug!("connection {:?} timeout not present", self.conn_id()); - tokio::time::sleep(Duration::MAX).await - } - } => { - self.sessions_housekeeping(); - if !self.sessions.is_empty() { - warn!("connection {:?} timed out with {} open sessions", - self.conn_id(), self.sessions.len()); - } - let mut qconn = self.conn_io.quic.lock(); - // closes connection - qconn.on_timeout(); - if let Some(timeout) = timeout { - debug!("connection {:?} timed out {:?}", self.conn_id(), timeout); - } - } - } - continue 'poll; - } Err(e) => { - // If an error occurs while processing data, the connection is closed with - // the appropriate error code, using the transport’s close() method. - - // send the close() event - self.conn_io.tx_notify.notify_waiters(); - - error!( - "H3 connection {:?} closed with error {:?}.", - self.conn_io.id, e - ); - return Err(e) - .explain_err(H3Error, |_| "failed to poll H3 connection for new events"); + let conn_id = self.conn_id().clone(); + + let drop_sessions = &self.drop_sessions.clone(); + let fn_drop_sessions = |sessions: &mut StreamIdHashMap>| { + housekeeping_drop_sessions(&conn_id, sessions, drop_sessions) + }; + + let add_sessions = &self.add_sessions.clone(); + let fn_add_sessions = |sessions: &mut StreamIdHashMap>| { + housekeeping_add_sessions(&conn_id, sessions, add_sessions) + }; + + let conn_alive = self + .conn_io + .error_or_timeout_data_race( + e, + &mut self.sessions, + fn_drop_sessions, + fn_add_sessions, + ) + .await?; + if conn_alive { + continue 'poll; + } else { + break 'poll Ok(()); + } } }; let session = if let Some(session) = self.sessions.get_mut(&stream_id) { session } else { - self.add_sessions(); + let conn_id = self.conn_id().clone(); + housekeeping_add_sessions(&conn_id, &mut self.sessions, &self.add_sessions); let Some(session) = self.sessions.get_mut(&stream_id) else { return Err(Error::explain( InternalError, @@ -538,56 +493,32 @@ impl Http3Poll { } } - fn sessions_housekeeping(&mut self) { - self.drop_sessions(); - self.add_sessions() + fn conn_id(&self) -> &ConnectionId<'static> { + &self.conn_io.id } +} - fn add_sessions(&mut self) { - let mut add_sessions = self.add_sessions.lock(); - while let Some((stream_id, sender)) = add_sessions.pop_front() { - match self.sessions.insert(stream_id, sender) { - Some(_) => { - warn!( - "connection {:?} stream {} was already present in sessions", - self.conn_id(), - stream_id - ); - debug_assert!(false) - } - None => { - debug!( - "connection {:?} added stream id {} to sessions", - self.conn_io.id, stream_id - ) - } +fn housekeeping_add_sessions( + conn_id: &ConnectionId<'_>, + sessions: &mut StreamIdHashMap>, + add_sessions: &Mutex)>>, +) { + let mut add_sessions = add_sessions.lock(); + while let Some((stream_id, sender)) = add_sessions.pop_front() { + match sessions.insert(stream_id, sender) { + Some(_) => { + warn!( + "connection {:?} stream {} was already present in sessions", + conn_id, stream_id + ); + debug_assert!(false) } - } - } - - fn drop_sessions(&mut self) { - let mut drop_sessions = self.drop_sessions.lock(); - while let Some(stream_id) = drop_sessions.pop_front() { - match self.sessions.remove(&stream_id) { - None => { - warn!( - "connection {:?} failed to remove stream {} from sessions", - self.conn_id(), - stream_id - ); - debug_assert!(false) - } - Some(_) => { - debug!( - "connection {:?} removed stream id {} from sessions", - self.conn_io.id, stream_id - ) - } + None => { + debug!( + "connection {:?} added stream id {} to sessions", + conn_id, stream_id + ) } } } - - fn conn_id(&self) -> &ConnectionId<'static> { - &self.conn_io.id - } } diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index cc40eafbc..547dc2c08 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -14,23 +14,28 @@ //! HTTP/3 implementation -use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; +use crate::protocols::http::v3::nohash::StreamIdHashMap; +use crate::protocols::l4::quic::connector::OutgoingEstablishedState; +use crate::protocols::l4::quic::listener::IncomingEstablishedState; +use crate::protocols::l4::quic::{handle_connection_errors, MAX_IPV6_QUIC_DATAGRAM_SIZE}; use bytes::{BufMut, Bytes, BytesMut}; use http::uri::{Authority, Scheme}; use http::{HeaderMap, HeaderName, HeaderValue, Request, Uri, Version}; -use log::{debug, trace, warn}; +use log::{debug, error, trace, warn}; use parking_lot::Mutex; use pingora_error::ErrorType::{H3Error, InvalidHTTPHeader, ReadError, WriteError}; use pingora_error::{Error, ErrorType, OrErr, Result}; use pingora_http::{RequestHeader, ResponseHeader}; use quiche::h3::{Event, Header, NameValue}; -use quiche::{ConnectionId, Shutdown}; +use quiche::{h3, ConnectionId, Shutdown}; use std::cmp; +use std::collections::VecDeque; use std::fmt::Debug; use std::future::Future; use std::pin::Pin; use std::sync::Arc; -use tokio::sync::mpsc::Receiver; +use std::time::Duration; +use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::Notify; pub const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; @@ -44,24 +49,60 @@ pub mod server; #[derive(Clone)] pub(crate) struct ConnectionIo { - pub(crate) id: ConnectionId<'static>, + id: ConnectionId<'static>, - pub(crate) quic: Arc>, - pub(crate) http3: Arc>, + quic: Arc>, + http3: Arc>, // receive notification on Quic recv, used to check stream capacity // as it only increases after MaxData or MaxStreamData frame was received - pub(crate) rx_notify: Arc, + rx_notify: Arc, // trigger Quic send, continue ConnectionTx write loop - pub(crate) tx_notify: Arc, + tx_notify: Arc, +} + +impl From<(&OutgoingEstablishedState, h3::Connection)> for ConnectionIo { + fn from((state, h3conn): (&OutgoingEstablishedState, h3::Connection)) -> Self { + Self { + id: state.connection_id.clone(), + quic: state.connection.clone(), + http3: Arc::new(Mutex::new(h3conn)), + rx_notify: state.rx_notify.clone(), + tx_notify: state.tx_notify.clone(), + } + } +} + +impl From<(&IncomingEstablishedState, h3::Connection)> for ConnectionIo { + fn from((state, h3conn): (&IncomingEstablishedState, h3::Connection)) -> Self { + Self { + id: state.connection_id.clone(), + quic: state.connection.clone(), + http3: Arc::new(Mutex::new(h3conn)), + rx_notify: state.rx_notify.clone(), + tx_notify: state.tx_notify.clone(), + } + } } impl ConnectionIo { + pub(crate) fn conn_id(&self) -> &ConnectionId<'static> { + &self.id + } + pub(crate) fn is_shutting_down(&self) -> bool { let qconn = self.quic.lock(); qconn.is_draining() } + pub(crate) fn more_streams_available(&self) -> bool { + let qconn = self.quic.lock(); + qconn.is_established() + && !qconn.is_closed() + && !qconn.is_draining() + && qconn.peer_streams_left_bidi() > 0 + } + fn capacity( &self, stream_id: u64, @@ -214,6 +255,126 @@ impl ConnectionIo { self.tx_notify.notify_waiters() } + + async fn error_or_timeout_data_race( + &self, + error: h3::Error, + sessions: &mut StreamIdHashMap>, + mut drop_sessions: D, + mut add_sessions: A, + ) -> Result + where + D: FnMut(&mut StreamIdHashMap>), + A: FnMut(&mut StreamIdHashMap>), + { + match error { + h3::Error::Done => { + debug!("H3 connection {:?} no events available", self.conn_id()); + // TODO: in case PriorityUpdate was triggered call take_priority_update() here + + add_sessions(sessions); + drop_sessions(sessions); + + let timeout; + { + let qconn = self.quic.lock(); + let is_closed = + qconn.is_closed() || !(qconn.is_established() || qconn.is_in_early_data()); + if is_closed { + if !sessions.is_empty() { + warn!( + "H3 connection {:?} closed with open {} sessions", + self.conn_id(), + sessions.len() + ); + } else { + debug!("H3 connection {:?} closed", self.conn_id()); + } + + // send close in case it is a local error + self.tx_notify.notify_waiters(); + + return match handle_connection_errors( + self.conn_id(), + qconn.local_error(), + qconn.peer_error(), + ) { + Ok(()) => Ok(false), // signal connection close + Err(e) => Err(e), + }; + } + timeout = qconn.timeout(); + } + + // race for new data on connection or timeout + tokio::select! { + _data = self.rx_notify.notified() => { /* continue */ } + _timedout = async { + if let Some(timeout) = timeout { + debug!("connection {:?} timeout {:?}", self.conn_id(), timeout); + tokio::time::sleep(timeout).await + } else { + debug!("connection {:?} timeout not present", self.conn_id()); + tokio::time::sleep(Duration::MAX).await + } + } => { + drop_sessions(sessions); + if !sessions.is_empty() { + warn!("connection {:?} timed out with {} open sessions", + self.conn_id(), sessions.len()); + } + let mut qconn = self.quic.lock(); + // closes connection + qconn.on_timeout(); + if let Some(timeout) = timeout { + debug!("connection {:?} timed out {:?}", self.conn_id(), timeout); + } + } + } + Ok(true) // signal continue + } + _ => { + // If an error occurs while processing data, the connection is closed with + // the appropriate error code, using the transport’s close() method. + + // send the close() event + self.tx_notify.notify_waiters(); + + error!( + "H3 connection {:?} closed with error {:?}.", + self.conn_id(), + error + ); + Err(error).explain_err(H3Error, |_| "failed to poll H3 connection for new events") + } + } + } +} + +fn housekeeping_drop_sessions( + conn_id: &ConnectionId<'_>, + sessions: &mut StreamIdHashMap>, + drop_sessions: &Mutex>, +) { + let mut drop_sessions = drop_sessions.lock(); + + // housekeeping finished sessions + while let Some(stream_id) = drop_sessions.pop_front() { + match sessions.remove(&stream_id) { + None => { + warn!( + "connection {:?} failed to remove stream {} from sessions", + conn_id, stream_id + ) + } + Some(_) => { + debug!( + "connection {:?} stream {} removed from sessions", + conn_id, stream_id + ); + } + }; + } } fn event_to_request_headers(list: &Vec
) -> Result { diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index ae671dab3..7915050e9 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -20,11 +20,11 @@ use crate::protocols::http::v1::client::http_req_header_to_wire; use crate::protocols::http::v3::nohash::StreamIdHashMap; use crate::protocols::http::v3::{ data_finished_event, event_to_request_headers, header_size, headermap_to_headervec, - response_headers_to_event, ConnectionIo, H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY, - H3_SESSION_EVENTS_CHANNEL_SIZE, + housekeeping_drop_sessions, response_headers_to_event, ConnectionIo, + H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY, H3_SESSION_EVENTS_CHANNEL_SIZE, }; use crate::protocols::http::HttpTask; -use crate::protocols::l4::quic::{handle_connection_errors, Connection}; +use crate::protocols::l4::quic::Connection; use crate::protocols::{Digest, SocketAddr, Stream}; use bytes::Bytes; use http::uri::PathAndQuery; @@ -73,14 +73,10 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

{ return Err(Error::explain( @@ -177,30 +173,6 @@ impl H3Connection { Ok(()) } } - - fn sessions_housekeeping(&mut self) { - let mut drop_sessions = self.drop_sessions.lock(); - - // housekeeping finished sessions - while let Some(stream_id) = drop_sessions.pop_front() { - match self.sessions.remove(&stream_id) { - None => { - warn!( - "connection {:?} failed to remove stream {} from sessions", - self.conn_id(), - stream_id - ) - } - Some(_) => { - debug!( - "connection {:?} stream {} removed from sessions", - self.conn_id(), - stream_id - ); - } - }; - } - } } /// HTTP/3 server session @@ -372,84 +344,19 @@ impl H3Session { } } } - Err(h3::Error::Done) => { - debug!("H3 connection {:?} no events available", conn.conn_id()); - // TODO: in case PriorityUpdate was triggered call take_priority_update() here - - conn.sessions_housekeeping(); - - let timeout; - { - let qconn = conn.conn_io.quic.lock(); - let is_closed = qconn.is_closed() - || !(qconn.is_established() || qconn.is_in_early_data()); - if is_closed { - if !conn.sessions.is_empty() { - warn!( - "H3 connection {:?} closed with open {} sessions", - conn.conn_id(), - conn.sessions.len() - ); - } else { - debug!("H3 connection {:?} closed", conn.conn_id()); - } - // send close in case it is a local error - conn.conn_io.tx_notify.notify_waiters(); - - return match handle_connection_errors( - conn.conn_id(), - qconn.local_error(), - qconn.peer_error(), - ) { - Ok(()) => Ok(None), // closes the connection - Err(e) => Err(e), - }; - } - timeout = qconn.timeout(); - } + Err(e) => { + let conn_id = conn.conn_id().clone(); + let drop_sessions = &conn.drop_sessions.clone(); - // race for new data on connection or timeout - tokio::select! { - _data = conn.conn_io.rx_notify.notified() => { /* continue */ } - _timedout = async { - if let Some(timeout) = timeout { - debug!("connection {:?} timeout {:?}", conn.conn_id(), timeout); - tokio::time::sleep(timeout).await - } else { - debug!("connection {:?} timeout not present", conn.conn_id()); - tokio::time::sleep(Duration::MAX).await - } - } => { - conn.sessions_housekeeping(); - if !conn.sessions.is_empty() { - warn!("connection {:?} timed out with {} open sessions", - conn.conn_id(), conn.sessions.len()); - } - let mut qconn = conn.conn_io.quic.lock(); - // closes connection - qconn.on_timeout(); - if let Some(timeout) = timeout { - debug!("connection {:?} timed out {:?}", conn.conn_id(), timeout); - } - } - } + let fn_drop_sessions = |sessions: &mut StreamIdHashMap>| { + housekeeping_drop_sessions(&conn_id, sessions, drop_sessions) + }; + + conn.conn_io + .error_or_timeout_data_race(e, &mut conn.sessions, fn_drop_sessions, |_| {}) + .await?; continue 'poll; } - Err(e) => { - // If an error occurs while processing data, the connection is closed with - // the appropriate error code, using the transport’s close() method. - - // send the close() event - conn.conn_io.tx_notify.notify_waiters(); - - error!( - "H3 connection {:?} closed with error {:?}.", - conn.conn_id(), - e - ); - return Err(e) - .explain_err(H3Error, |_| "failed to poll H3 connection for new events"); - } } } } From 92ded9bf76c4949df7147cce1c3943866e920fcd Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Sat, 25 Jan 2025 17:45:35 +0100 Subject: [PATCH 38/52] doc, license headers, fmt & clippy --- pingora-core/src/apps/mod.rs | 4 +- pingora-core/src/connectors/http/mod.rs | 2 +- pingora-core/src/connectors/http/v1.rs | 3 + pingora-core/src/connectors/http/v2.rs | 4 +- pingora-core/src/connectors/http/v3.rs | 125 +++++++++--------- pingora-core/src/connectors/l4.rs | 19 +-- pingora-core/src/protocols/http/mod.rs | 1 + pingora-core/src/protocols/http/server.rs | 2 +- pingora-core/src/protocols/http/v3/client.rs | 24 +++- pingora-core/src/protocols/http/v3/mod.rs | 37 +++--- pingora-core/src/protocols/http/v3/nohash.rs | 5 +- pingora-core/src/protocols/http/v3/server.rs | 42 +++--- .../src/protocols/l4/quic/connector.rs | 31 ++++- .../src/protocols/l4/quic/id_token.rs | 10 +- .../src/protocols/l4/quic/listener.rs | 36 +++-- pingora-core/src/protocols/l4/quic/mod.rs | 93 ++++++++----- pingora-core/src/protocols/l4/quic/sendto.rs | 9 +- pingora-core/src/protocols/tls/quic/client.rs | 32 +++-- pingora-core/src/protocols/tls/quic/mod.rs | 16 +++ pingora-core/src/protocols/tls/quic/server.rs | 32 +++-- pingora-core/src/upstreams/peer.rs | 1 + pingora-proxy/src/lib.rs | 3 + pingora/src/lib.rs | 4 +- 23 files changed, 343 insertions(+), 192 deletions(-) diff --git a/pingora-core/src/apps/mod.rs b/pingora-core/src/apps/mod.rs index 21c099099..644fa6a2b 100644 --- a/pingora-core/src/apps/mod.rs +++ b/pingora-core/src/apps/mod.rs @@ -93,7 +93,7 @@ pub trait HttpServerApp { /// every time a new HTTP/3 **connection** needs to be established. /// /// A `None` means to use the built-in default options. See [`server::H2Options`] for more details. - fn h3_options(&self) -> Option<&h3_server::H3Options> { + fn h3_options(&self) -> Option<&h3_server::Http3Options> { None } @@ -245,7 +245,7 @@ where }; return None; } - h3_stream = h3_server::H3Session::from_h3_conn(&mut h3_conn, digest.clone()) => h3_stream + h3_stream = h3_server::Http3Session::from_h3_conn(&mut h3_conn, digest.clone()) => h3_stream }; let h3_stream = match h3_stream { diff --git a/pingora-core/src/connectors/http/mod.rs b/pingora-core/src/connectors/http/mod.rs index a92f2a644..d9d746e17 100644 --- a/pingora-core/src/connectors/http/mod.rs +++ b/pingora-core/src/connectors/http/mod.rs @@ -79,7 +79,7 @@ impl Connector { } } let session = self.h2.new_http_session(peer).await?; - return Ok((session, false)); + Ok((session, false)) } /* // FIXME: correctly route HTTP3 diff --git a/pingora-core/src/connectors/http/v1.rs b/pingora-core/src/connectors/http/v1.rs index ffa6df20f..3eb5c16d4 100644 --- a/pingora-core/src/connectors/http/v1.rs +++ b/pingora-core/src/connectors/http/v1.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! Connecting to HTTP 1.x servers + use crate::connectors::{ConnectorOptions, TransportConnector}; use crate::protocols::http::v1::client::HttpSession; use crate::upstreams::peer::Peer; @@ -19,6 +21,7 @@ use crate::upstreams::peer::Peer; use pingora_error::Result; use std::time::Duration; +/// HTTP 1.x connector pub struct Connector { transport: TransportConnector, } diff --git a/pingora-core/src/connectors/http/v2.rs b/pingora-core/src/connectors/http/v2.rs index d9b9dafbe..bbec0cb44 100644 --- a/pingora-core/src/connectors/http/v2.rs +++ b/pingora-core/src/connectors/http/v2.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! Connecting to HTTP 2 servers + use super::HttpSession; use crate::connectors::{ConnectorOptions, TransportConnector}; use crate::protocols::http::v1::client::HttpSession as Http1Session; @@ -212,7 +214,7 @@ impl InUsePool { const DEFAULT_POOL_SIZE: usize = 128; -/// Http2 connector +/// HTTP 2 connector pub struct Connector { // just for creating connections, the Stream of h2 should be reused transport: TransportConnector, diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 9177791b8..2216a09d8 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -1,10 +1,26 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Connecting to HTTP 3 servers + use super::HttpSession; use crate::connectors::http::InUsePool; use crate::connectors::{ConnectorOptions, TransportConnector}; use crate::protocols::http::v3::client::{Http3Poll, Http3Session}; use crate::protocols::http::v3::ConnectionIo; -use crate::protocols::l4::quic::{Connection, Crypto}; +use crate::protocols::l4::quic::Connection; use crate::protocols::{Digest, Stream, UniqueID, UniqueIDType}; use crate::upstreams::peer::{Peer, ALPN}; use log::debug; @@ -22,40 +38,44 @@ use tokio::sync::{mpsc, watch}; use tokio::task::JoinHandle; // FIXME: ConnectorOptions contains CA file path from ServerConfig +/// a ref to an established HTTP 3 connection #[derive(Clone)] pub(crate) struct ConnectionRef(Arc); -impl ConnectionRef { - pub fn new( - l4_stream: Stream, - conn_io: ConnectionIo, - digest: Digest, - add_sessions: Arc)>>>, - drop_sessions: Arc>>, - idle_close: watch::Receiver, - max_streams: usize, - h3poll_task: JoinHandle>, - ) -> Self { - Self(Arc::new(ConnectionRefInner { - l4_stream, - conn_io, - - digest, - max_streams, - current_streams: AtomicUsize::new(0), - release_lock: Arc::new(Default::default()), - - add_sessions, - drop_sessions, - idle_close, - h3poll_task, - })) - } +/// corresponds to an established HTTP 3 connection +pub(crate) struct ConnectionRefInner { + /// avoid dropping the [`Stream`] & used for [`UniqueIDType`] + l4_stream: Stream, + + /// resources required for Http3, Quic & network IO + conn_io: ConnectionIo, + + /// connection [`Digest`] + digest: Digest, + + /// max. concurrent streams this connection is allowed to create + max_streams: usize, + + /// how many concurrent streams already active + current_streams: AtomicUsize, + + /// lock is used during moving the connection across pools + release_lock: Arc>, + + /// add session to active sessions in Http3Poll task + add_sessions: Arc)>>>, + /// remove session from active sessions in Http3Poll task + drop_sessions: Arc>>, + /// watch for idle pool timeouts + idle_close: watch::Receiver, + + /// the background task handle polling the HTTP3 3 connection + h3poll_task: JoinHandle>, } impl ConnectionRef { pub(crate) fn conn_id(&self) -> &ConnectionId<'_> { - &self.0.conn_io.conn_id() + self.0.conn_io.conn_id() } pub(crate) fn conn_io(&self) -> &ConnectionIo { @@ -98,35 +118,6 @@ impl ConnectionRef { } } -pub(crate) struct ConnectionRefInner { - // avoid dropping stream, & used for UniqueIDType - l4_stream: Stream, - - // resources required for Http3, Quic & network IO - conn_io: ConnectionIo, - - // connection digest - digest: Digest, - - // max concurrent streams this connection is allowed to create - max_streams: usize, - - // how many concurrent streams already active - current_streams: AtomicUsize, - - // lock is used during moving the connection across pools - release_lock: Arc>, - - // add session to active sessions in Http3Poll task - add_sessions: Arc)>>>, - // remove session from active sessions in Http3Poll task - drop_sessions: Arc>>, - // watch for idle pool timeouts - idle_close: watch::Receiver, - - h3poll_task: JoinHandle>, -} - impl Drop for ConnectionRefInner { fn drop(&mut self) { if !self.h3poll_task.is_finished() { @@ -145,7 +136,7 @@ impl UniqueID for ConnectionRef { } } -/// Http3 connector +/// HTTP 3 connector pub struct Connector { // for creating connections, the Stream for h3 should be reused transport: TransportConnector, @@ -153,7 +144,6 @@ pub struct Connector { idle_pool: Arc>, // the pool of h3 connections that have ongoing streams in_use_pool: InUsePool, - crypto: Option, } const DEFAULT_POOL_SIZE: usize = 128; @@ -169,7 +159,6 @@ impl Connector { transport: TransportConnector::new(options), idle_pool: Arc::new(ConnectionPool::new(pool_size)), in_use_pool: InUsePool::new(), - crypto: Crypto::new().ok(), } } @@ -368,16 +357,20 @@ async fn handshake(mut stream: Stream, max_streams: usize) -> Result panic!("expect h3"), - HttpSession::H3(_h3_session) => assert!(true), + HttpSession::H3(_h3_session) => { /* success */ } } } diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index d9cb17ab4..c781b767f 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! TCP and UDP/Quic connections + #[cfg(unix)] use crate::protocols::l4::ext::connect_uds; use crate::protocols::l4::ext::{ @@ -105,14 +107,11 @@ where let mut stream: Stream = if let Some(custom_l4) = peer.get_peer_options().and_then(|o| o.custom_l4.as_ref()) { custom_l4.connect(peer_addr).await? + } else if peer.udp_http3() { + inner_udp_connect(peer, &bind_to, peer_addr).await? } else { - if peer.udp_http3() { - // create UDP sockets - inner_udp_connect(peer, &bind_to, peer_addr).await? - } else { - // create TCP sockets - inner_tcp_connect(peer, bind_to, peer_addr).await? - } + // create TCP sockets + inner_tcp_connect(peer, bind_to, peer_addr).await? }; let tracer = peer.get_tracer(); @@ -140,6 +139,7 @@ where Ok(stream) } +/// create [`tokio::net::TcpSocket`] and a [`tokio::net::TcpStream`] async fn inner_tcp_connect

( peer: &P, bind_to: Option, @@ -222,6 +222,7 @@ where } } +/// create [`tokio::net::UdpSocket`] and a Quic [Connection](`crate::protocols::l4::quic::Connection::OutgoingHandshake`) async fn inner_udp_connect

( peer: &P, bind_to: &Option, @@ -255,7 +256,7 @@ where let socket = match conn_res { Ok(socket) => { debug!("connected to new server: {}", peer.address()); - Ok(socket.into()) + Ok(socket) } Err(e) => { let c = format!("Fail to connect to {peer}"); @@ -268,7 +269,7 @@ where let mut quic_http3_configs = None; if let Some(peer_options) = peer.get_peer_options() { - quic_http3_configs = peer_options.quic_http3_configs.clone() + quic_http3_configs.clone_from(&peer_options.quic_http3_configs) }; Ok(Connection::initiate(socket, quic_http3_configs)?.into()) diff --git a/pingora-core/src/protocols/http/mod.rs b/pingora-core/src/protocols/http/mod.rs index ab799a2a0..bf712b5c5 100644 --- a/pingora-core/src/protocols/http/mod.rs +++ b/pingora-core/src/protocols/http/mod.rs @@ -59,6 +59,7 @@ impl HttpTask { } } +/// HTTP Version #[derive(Debug, Default, Copy, Clone)] pub enum HttpVersion { #[default] diff --git a/pingora-core/src/protocols/http/server.rs b/pingora-core/src/protocols/http/server.rs index 59c353bad..313a062c8 100644 --- a/pingora-core/src/protocols/http/server.rs +++ b/pingora-core/src/protocols/http/server.rs @@ -16,7 +16,7 @@ use super::v1::server::HttpSession as SessionV1; use super::v2::server::HttpSession as SessionV2; -use super::v3::server::H3Session as SessionV3; +use super::v3::server::Http3Session as SessionV3; use super::HttpTask; use super::{error_resp, HttpVersion}; use crate::protocols::{Digest, SocketAddr, Stream}; diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index 56d6ab62e..011dcc8ef 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -1,3 +1,19 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! HTTP/3 client session and connection + use crate::connectors::http::v3::ConnectionRef; use crate::protocols::http::v3::nohash::StreamIdHashMap; use crate::protocols::http::v3::{ @@ -23,6 +39,10 @@ use std::time::Duration; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::{mpsc, watch}; +/// HTTP/3 client session +/// +/// The [`Http3Session`] is built around [`pingora_http`] structs and converts to +/// [`quiche::h3::Event`] where needed. pub struct Http3Session { conn: ConnectionRef, @@ -209,7 +229,7 @@ impl Http3Session { return Ok(None); } - let read_timeout = self.read_timeout.clone(); + let read_timeout = self.read_timeout; tokio::select! { res = async { if !self.read_continue { @@ -337,7 +357,7 @@ impl Http3Session { /// For reused connection, the timing in the digest will reflect its initial handshakes /// The caller should check if the connection is reused to avoid misuse the timing field. pub fn digest(&self) -> Option<&Digest> { - Some(&self.conn.digest()) + Some(self.conn.digest()) } /// Return a mutable [`Digest`] reference for the connection diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 547dc2c08..050c475f1 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -15,9 +15,9 @@ //! HTTP/3 implementation use crate::protocols::http::v3::nohash::StreamIdHashMap; -use crate::protocols::l4::quic::connector::OutgoingEstablishedState; -use crate::protocols::l4::quic::listener::IncomingEstablishedState; -use crate::protocols::l4::quic::{handle_connection_errors, MAX_IPV6_QUIC_DATAGRAM_SIZE}; +use crate::protocols::l4::quic::{ + connector, handle_connection_errors, listener, MAX_IPV6_QUIC_DATAGRAM_SIZE, +}; use bytes::{BufMut, Bytes, BytesMut}; use http::uri::{Authority, Scheme}; use http::{HeaderMap, HeaderName, HeaderValue, Request, Uri, Version}; @@ -38,31 +38,38 @@ use std::time::Duration; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::Notify; -pub const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; -pub const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; +const H3_SESSION_EVENTS_CHANNEL_SIZE: usize = 256; +const H3_SESSION_DROP_DEQUE_INITIAL_CAPACITY: usize = 2048; const MAX_PER_INVOCATION_READ_BODY_BYTES: usize = MAX_IPV6_QUIC_DATAGRAM_SIZE * 64; pub mod client; -pub mod nohash; +pub(crate) mod nohash; pub mod server; +/// ConnectionIo useable for HTTP 3 interactions +/// unifies actions that are used in server & client #[derive(Clone)] pub(crate) struct ConnectionIo { + /// the QUIC/HTTP 3 connection id id: ConnectionId<'static>, + /// the underlying Quic connection quic: Arc>, + /// the actual HTTP 3 connection http3: Arc>, - // receive notification on Quic recv, used to check stream capacity - // as it only increases after MaxData or MaxStreamData frame was received + /// receive notification on Quic recv + /// + /// e.g. used to continue the Http3Poll loop and to check stream capacity + /// as it only increases after MaxData or MaxStreamData frame was received rx_notify: Arc, - // trigger Quic send, continue ConnectionTx write loop + /// trigger Quic send, continues [`crate::protocols::l4::quic::ConnectionTx`] write loop tx_notify: Arc, } -impl From<(&OutgoingEstablishedState, h3::Connection)> for ConnectionIo { - fn from((state, h3conn): (&OutgoingEstablishedState, h3::Connection)) -> Self { +impl From<(&connector::EstablishedState, h3::Connection)> for ConnectionIo { + fn from((state, h3conn): (&connector::EstablishedState, h3::Connection)) -> Self { Self { id: state.connection_id.clone(), quic: state.connection.clone(), @@ -73,8 +80,8 @@ impl From<(&OutgoingEstablishedState, h3::Connection)> for ConnectionIo { } } -impl From<(&IncomingEstablishedState, h3::Connection)> for ConnectionIo { - fn from((state, h3conn): (&IncomingEstablishedState, h3::Connection)) -> Self { +impl From<(&listener::EstablishedState, h3::Connection)> for ConnectionIo { + fn from((state, h3conn): (&listener::EstablishedState, h3::Connection)) -> Self { Self { id: state.connection_id.clone(), quic: state.connection.clone(), @@ -465,7 +472,7 @@ fn request_headers_to_event(req: &RequestHeader) -> Result> { Ok(qheaders) } -fn event_to_response_headers(resp: &Vec

) -> Result { +fn event_to_response_headers(resp: &[Header]) -> Result { // pseudo-headers have to be first, response only has a single valid pseudo header ":status" // which MUST be included as per RFC9114 Section 4.3.2 let mut response = ResponseHeader::build(resp[0].value(), Some(resp.len() - 1))?; @@ -494,7 +501,7 @@ fn headermap_to_headervec(headers: &HeaderMap) -> Vec
{ fn headervec_to_headermap(headers: &Vec
) -> Result { let mut map = HeaderMap::with_capacity(headers.len()); for h in headers { - if h.name().len() > 0 && h.name()[0] == b":".as_slice()[0] { + if !h.name().is_empty() && h.name()[0] == b":".as_slice()[0] { let k = HeaderName::from_bytes(h.name()).explain_err(InvalidHTTPHeader, |_| { format!("failed to parse header name {:?}", h.name()) })?; diff --git a/pingora-core/src/protocols/http/v3/nohash.rs b/pingora-core/src/protocols/http/v3/nohash.rs index 3488e9d40..18e7ca5c3 100644 --- a/pingora-core/src/protocols/http/v3/nohash.rs +++ b/pingora-core/src/protocols/http/v3/nohash.rs @@ -24,7 +24,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; /// A simple no-op hasher for Stream IDs. /// @@ -56,5 +56,4 @@ impl std::hash::Hasher for StreamIdHasher { type BuildStreamIdHasher = std::hash::BuildHasherDefault; -pub type StreamIdHashMap = HashMap; -pub type StreamIdHashSet = HashSet; +pub(crate) type StreamIdHashMap = HashMap; diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 7915050e9..1cb6b1e1a 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -34,7 +34,7 @@ use parking_lot::Mutex; use pingora_error::ErrorType::{ConnectError, H3Error, InternalError, ReadError, WriteError}; use pingora_error::{Error, OrErr, Result}; use pingora_http::{RequestHeader, ResponseHeader}; -pub use quiche::h3::Config as H3Options; +pub use quiche::h3::Config as Http3Options; use quiche::h3::{self, Connection as QuicheH3Connection, Event, NameValue}; use quiche::ConnectionId; use std::collections::VecDeque; @@ -46,11 +46,14 @@ use tokio::sync::mpsc::{self, Receiver, Sender}; const BODY_BUF_LIMIT: usize = 1024 * 64; const SHUTDOWN_GOAWAY_DRAIN_TIMEOUT: Duration = Duration::from_secs(60); -/// Perform HTTP/3 connection handshake with an established (QUIC) connection. +/// Perform HTTP/3 connection handshake with an established Quic connection. /// /// The optional `options` allow to adjust certain HTTP/3 parameters and settings. -/// See [`H3Options`] for more details. -pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result { +/// See [`Http3Options`] for more details. +pub(crate) async fn handshake( + mut io: Stream, + options: Option<&Http3Options>, +) -> Result { let Some(conn) = io.quic_connection_state() else { return Err(Error::explain( ConnectError, @@ -86,7 +89,7 @@ pub async fn handshake(mut io: Stream, options: Option<&H3Options>) -> Result

) -> Result

, } -impl Drop for H3Connection { +impl Drop for Http3Connection { fn drop(&mut self) { let mut drop_connections = self.drop_connections.lock(); drop_connections.push_back(self.conn_id().clone()); @@ -124,7 +127,7 @@ impl Drop for H3Connection { } } -impl H3Connection { +impl Http3Connection { fn conn_id(&self) -> &ConnectionId<'static> { &self.conn_io.id } @@ -176,10 +179,11 @@ impl H3Connection { } /// HTTP/3 server session -/// [`H3Session`]s contain the converted [`quiche::h3::Event::Headers`] as -/// [`pingora_http::RequestHeader`]. The [`H3Session`] is built around [`pingora_http`] structs and -/// converts to [`quiche::h3::Event`] where needed. -pub struct H3Session { +/// +/// [`Http3Session`]s contain the converted [`quiche::h3::Event::Headers`] as +/// [`pingora_http::RequestHeader`]. The [`Http3Session`] is built around [`pingora_http`] structs +/// and converts to [`quiche::h3::Event`] where needed. +pub struct Http3Session { pub(crate) stream_id: u64, conn_io: ConnectionIo, @@ -213,7 +217,7 @@ pub struct H3Session { digest: Arc, } -impl Drop for H3Session { +impl Drop for Http3Session { fn drop(&mut self) { let mut drop_sessions = self.drop_session.lock(); drop_sessions.push_back(self.stream_id); @@ -225,8 +229,8 @@ impl Drop for H3Session { } } -impl H3Session { - /// Create a new [`H3Session`] from the QUIC connection. +impl Http3Session { + /// Create a new [`Http3Session`] from the QUIC connection. /// This function returns a new HTTP/3 session when the provided HTTP/3 connection, `conn`, /// establishes a new HTTP/3 stream to this server. /// @@ -239,8 +243,8 @@ impl H3Session { /// /// `None` will be returned when the connection is closing so that the loop can exit. /// - pub async fn from_h3_conn( - conn: &mut H3Connection, + pub(crate) async fn from_h3_conn( + conn: &mut Http3Connection, digest: Arc, ) -> Result> { 'poll: loop { @@ -306,7 +310,7 @@ impl H3Session { let (event_tx, event_rx) = mpsc::channel(H3_SESSION_EVENTS_CHANNEL_SIZE); - let session = H3Session { + let session = Http3Session { stream_id, conn_io: conn.conn_io.clone(), diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index c545f901d..be63246c0 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -1,3 +1,19 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Quic Connector + use crate::protocols::l4::quic::Connection; use crate::protocols::l4::quic::{ detect_gso_pacing, Crypto, QuicHttp3Configs, SocketDetails, MAX_IPV6_BUF_SIZE, @@ -13,7 +29,7 @@ use tokio::sync::Notify; use tokio::task::JoinHandle; /// corresponds to a new outgoing (connector) connection before the handshake is completed -pub struct OutgoingHandshakeState { +pub struct HandshakeState { //pub(crate) connection_id: ConnectionId<'static>, pub(crate) socket_details: SocketDetails, pub(crate) crypto: Crypto, @@ -21,7 +37,7 @@ pub struct OutgoingHandshakeState { } /// can be used to wait for network data or trigger network sending -pub struct OutgoingEstablishedState { +pub struct EstablishedState { pub(crate) connection_id: ConnectionId<'static>, pub(crate) connection: Arc>, @@ -52,7 +68,7 @@ impl Connection { let configs = configs.unwrap_or(QuicHttp3Configs::from_ca_file_path(None)?); let (gso_enabled, pacing_enabled) = detect_gso_pacing(&io); - Ok(Self::OutgoingHandshake(OutgoingHandshakeState { + Ok(Self::OutgoingHandshake(HandshakeState { crypto: Crypto::new()?, socket_details: SocketDetails { io: Arc::new(io), @@ -66,8 +82,8 @@ impl Connection { } } -/// connections receive task receives data from the UDP socket to the [`quiche::Connection`] -/// the task notifies the `rx_notify` when data was received from network for teh connection +/// connections receive task receives data from the UDP socket into the [`quiche::Connection`] +/// the task notifies the `rx_notify` when data was received from network for the connection pub struct ConnectionRx { pub(crate) socket_details: SocketDetails, @@ -79,6 +95,9 @@ pub struct ConnectionRx { } impl ConnectionRx { + /// start the `rx` task, consumes the struct + /// + /// is stopped within the `Drop` implementation of the corresponding [`Connection`] pub async fn start(self) -> Result<()> { let socket = self.socket_details.io; let local_addr = self.socket_details.local_addr; @@ -144,7 +163,7 @@ impl ConnectionRx { } } -impl Drop for OutgoingEstablishedState { +impl Drop for EstablishedState { fn drop(&mut self) { if !self.rx_handle.is_finished() { self.rx_handle.abort(); diff --git a/pingora-core/src/protocols/l4/quic/id_token.rs b/pingora-core/src/protocols/l4/quic/id_token.rs index 3b3067f4b..e1693a75c 100644 --- a/pingora-core/src/protocols/l4/quic/id_token.rs +++ b/pingora-core/src/protocols/l4/quic/id_token.rs @@ -24,6 +24,8 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//! generate connection ids and retry tokens + use log::trace; use quiche::{ConnectionId, Header}; use ring::hmac::Key; @@ -38,7 +40,7 @@ use std::net; /// /// Note that this function is only an example and doesn't do any cryptographic /// authenticate of the token. *It should not be used in production system*. -pub(crate) fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec { +pub(crate) fn mint_token(hdr: &Header, src: &net::SocketAddr) -> Vec { // TODO: implement token generation/validation using crypto let mut token = Vec::new(); @@ -65,7 +67,7 @@ pub(crate) fn mint_token(hdr: &quiche::Header, src: &net::SocketAddr) -> Vec pub(crate) fn validate_token<'a>( src: &net::SocketAddr, token: &'a [u8], -) -> Option> { +) -> Option> { // TODO: implement token generation/validation using crypto if token.len() < 6 { return None; @@ -86,9 +88,10 @@ pub(crate) fn validate_token<'a>( return None; } - Some(quiche::ConnectionId::from_ref(&token[addr.len()..])) + Some(ConnectionId::from_ref(&token[addr.len()..])) } +/// Generate an incoming [`ConnectionId`]. pub(crate) fn generate_incoming_cid(key: &Key, hdr: &Header) -> ConnectionId<'static> { let conn_id = ring::hmac::sign(key, &hdr.dcid); let conn_id = conn_id.as_ref()[..quiche::MAX_CONN_ID_LEN].to_vec(); @@ -98,6 +101,7 @@ pub(crate) fn generate_incoming_cid(key: &Key, hdr: &Header) -> ConnectionId<'st conn_id } +/// Generate an outgoing [`ConnectionId`]. pub(crate) fn generate_outgoing_cid(rng: &SystemRandom) -> ConnectionId<'static> { let mut conn_id = [0; quiche::MAX_CONN_ID_LEN]; rng.fill(&mut conn_id[..]).unwrap(); diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index 30f77cbdd..d86b65061 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -1,8 +1,24 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Quic Listener + use crate::protocols::l4::quic::id_token::generate_incoming_cid; use crate::protocols::l4::quic::QuicHttp3Configs; use crate::protocols::l4::quic::{ detect_gso_pacing, Connection, Crypto, SocketDetails, CONNECTION_DROP_DEQUE_INITIAL_SIZE, - HANDSHAKE_PACKET_BUFFER_SIZE, MAX_IPV6_BUF_SIZE, + MAX_IPV6_BUF_SIZE, }; use crate::protocols::l4::stream::Stream; use log::{debug, trace, warn}; @@ -21,8 +37,11 @@ use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::sync::Notify; use tokio::task::JoinHandle; +/// max. amount of [`UdpRecv`] messages on the `tokio::sync::mpsc::channel` +const HANDSHAKE_PACKET_BUFFER_SIZE: usize = 64; + /// corresponds to a new incoming (listener) connection before the handshake is completed -pub struct IncomingHandshakeState { +pub struct HandshakeState { pub(crate) connection_id: ConnectionId<'static>, pub(crate) configs: QuicHttp3Configs, pub(crate) drop_connection: Arc>>>, @@ -37,7 +56,7 @@ pub struct IncomingHandshakeState { } /// can be used to wait for network data or trigger network sending -pub struct IncomingEstablishedState { +pub struct EstablishedState { pub(crate) connection_id: ConnectionId<'static>, pub(crate) connection: Arc>, @@ -55,8 +74,8 @@ pub struct IncomingEstablishedState { pub(crate) drop_connection: Arc>>>, } -/// A [`IncomingConnectionHandle`] corresponds to a [`IncomingConnection`]. -/// For further details please refer to [`IncomingConnection`]. +/// A [`IncomingConnectionHandle`] corresponds to an Incoming [`Connection`]. +/// For further details please refer to [`Connection`]. pub enum IncomingConnectionHandle { /// new connection handle during handshake Handshake(HandshakeHandle), @@ -103,8 +122,9 @@ pub struct UdpRecv { } /// The [`Listener`] contains a [`HashMap`] linking [`quiche::ConnectionId`] to [`IncomingConnectionHandle`] -/// the `Listener::accept` method returns [`IncomingConnection`]s and is responsible to forward network -/// UDP packets to the according `Connection` through the corresponding [`IncomingConnectionHandle`]. +/// the `Listener::accept` method returns Incoming [`Connection`]s and is responsible to forward +/// network UDP packets to the according `Incoming [`Connection`] through the corresponding +/// [`IncomingConnectionHandle`]. /// /// In the [`IncomingConnectionHandle::Handshake`] state the UDP packets are forwarded through a /// [`tokio::sync::mpsc::channel`]. @@ -287,7 +307,7 @@ impl Listener { let response = Arc::new(Mutex::new(None)); debug!("new incoming connection {:?}", conn_id); - let connection = Connection::IncomingHandshake(IncomingHandshakeState { + let connection = Connection::IncomingHandshake(HandshakeState { connection_id: conn_id.clone(), drop_connection: self.drop_connections.clone(), diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 0e0730fcd..2a3e20c49 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -1,3 +1,19 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Quic integration + use log::{debug, error, trace}; use parking_lot::Mutex; use pingora_error::{Error, ErrorType, OrErr, Result}; @@ -31,9 +47,6 @@ use crate::protocols::l4::quic::sendto::{detect_gso, send_to, set_txtime_sockopt use crate::protocols::tls::{SslDigest, TlsRef}; use crate::protocols::{ConnectionState, Ssl}; -use crate::protocols::l4::quic::connector::{OutgoingEstablishedState, OutgoingHandshakeState}; -use crate::protocols::l4::quic::listener::{IncomingEstablishedState, IncomingHandshakeState}; - // UDP header 8 bytes, IPv4 Header 20 bytes //pub const MAX_IPV4_BUF_SIZE: usize = 65507; /// UDP header 8 bytes, IPv6 Header 40 bytes @@ -48,32 +61,32 @@ pub const MAX_IPV6_UDP_PACKET_SIZE: usize = 1452; // TODO: validate size (possibly 1200 is the standard) pub const MAX_IPV6_QUIC_DATAGRAM_SIZE: usize = 1350; -/// max. amount of [`UdpRecv`] messages on the `tokio::sync::mpsc::channel` -const HANDSHAKE_PACKET_BUFFER_SIZE: usize = 64; /// initial size for the connection drop deque const CONNECTION_DROP_DEQUE_INITIAL_SIZE: usize = 1024; /// Represents a Quic [`Connection`] in either `Incoming` or `Outgoing` direction. /// -/// A [`Connection`] of variant `Incoming*` corresponds to a [`IncomingConnectionHandle`]. -/// They are created having e.g. the variants [`Connection::IncomingHandshake`] / [`IncomingConnectionHandle::Handshake`] -/// and are transitioned to the [`Connection::IncomingEstablished`] / [`IncomingConnectionHandle::Established`] -/// variants once the TLS handshake was successful. +/// A [`Connection`] of variant `Incoming*` corresponds to a `IncomingConnectionHandle`. +/// +/// They are created having e.g. the variants [`Connection::IncomingHandshake`] / [`listener::IncomingConnectionHandle::Handshake`]. +/// Once the TLS handshake was successful they are transitioned to the +/// [`Connection::IncomingEstablished`] / [`listener::IncomingConnectionHandle::Established`] variants. /// -/// `Outgoing` connections do not have corresponding handles as they are bound to a distinguished -/// socket/quad-tuple and having a distinguished ConnectionRx task. +/// `Outgoing` connections **do not have** corresponding handles as they are bound to +/// a distinguished socket/4-tuple and having a distinguished [`connector::ConnectionRx`] task. pub enum Connection { /// new incoming connection while in the handshake phase - IncomingHandshake(IncomingHandshakeState), + IncomingHandshake(listener::HandshakeState), /// established incoming connection after successful handshake ([`quiche::Connection::is_established`]) - IncomingEstablished(IncomingEstablishedState), + IncomingEstablished(listener::EstablishedState), /// new outgoing connection while in the handshake phase - OutgoingHandshake(OutgoingHandshakeState), + OutgoingHandshake(connector::HandshakeState), /// established outgoing connection after successful handshake ([`quiche::Connection::is_established`]) - OutgoingEstablished(OutgoingEstablishedState), + OutgoingEstablished(connector::EstablishedState), } +/// the [`UdpSocket`] and according details #[derive(Clone)] pub(crate) struct SocketDetails { pub(crate) io: Arc, @@ -101,9 +114,9 @@ impl Crypto { } } -/// connections transmit task sends data from the [`quiche::Connection`] to the UDP socket -/// the actor is notified through the `tx_notify` and flushes all connection data to the network -pub struct ConnectionTx { +/// Connection transmit (`tx`) task sends data from the [`quiche::Connection`] to the UDP socket +/// the task is notified through the `tx_notify` and flushes all connection data to the network +pub(crate) struct ConnectionTx { pub(crate) socket_details: SocketDetails, pub(crate) connection_id: ConnectionId<'static>, @@ -114,10 +127,16 @@ pub struct ConnectionTx { } /// During establishing a [`ConnectionTx`] task is started being responsible to write data from -/// the [`quiche::Connection`] to the `[UdpSocket`]. -/// The connections `Rx` path is part of the [`Listener::accept`] which distributes the datagrams +/// the [`quiche::Connection`] to the [`UdpSocket`]. +/// +/// The connections receive (`rx`) path is part of the [`listener::Listener::accept`] which distributes the datagrams /// to the according connections. +/// +/// For outgoing [`Connection`]s a [`connector::ConnectionRx`] task is responsible to receive the data into the [`quiche::Connection`] impl ConnectionTx { + /// start the `tx` task, consumes the struct + /// + /// is stopped within the `Drop` implementation of the corresponding [`Connection`] pub(crate) async fn start(mut self) -> Result<()> { let id = self.connection_id; let mut out = [0u8; MAX_IPV6_BUF_SIZE]; @@ -230,7 +249,7 @@ impl ConnectionTx { } /// used within [`ConnectionTx`] to keep track of the maximum send burst -pub struct TxStats { +pub(crate) struct TxStats { loss_rate: f64, max_send_burst: usize, max_datagram_size: usize, @@ -449,8 +468,8 @@ impl Debug for QuicHttp3Configs { } fn detect_gso_pacing(io: &UdpSocket) -> (bool, bool) { - let gso_enabled = detect_gso(&io, MAX_IPV6_QUIC_DATAGRAM_SIZE); - let pacing_enabled = match set_txtime_sockopt(&io) { + let gso_enabled = detect_gso(io, MAX_IPV6_QUIC_DATAGRAM_SIZE); + let pacing_enabled = match set_txtime_sockopt(io) { Ok(_) => { debug!("successfully set SO_TXTIME socket option"); true @@ -464,7 +483,7 @@ fn detect_gso_pacing(io: &UdpSocket) -> (bool, bool) { } impl Connection { - pub(crate) fn establish_incoming(&mut self, state: IncomingEstablishedState) -> Result<()> { + pub(crate) fn establish_incoming(&mut self, state: listener::EstablishedState) -> Result<()> { if cfg!(test) { let conn = state.connection.lock(); debug_assert!( @@ -516,7 +535,7 @@ impl Connection { } } - pub(crate) fn establish_outgoing(&mut self, state: OutgoingEstablishedState) -> Result<()> { + pub(crate) fn establish_outgoing(&mut self, state: connector::EstablishedState) -> Result<()> { if cfg!(test) { let conn = state.connection.lock(); debug_assert!( @@ -565,15 +584,21 @@ impl ConnectionState for Connection { impl Drop for Connection { fn drop(&mut self) { - match self { - Connection::IncomingEstablished(s) => { - if !s.tx_handle.is_finished() { - s.tx_handle.abort(); - debug!("connection {:?} stopped tx task", s.connection_id); - } + if let Connection::IncomingEstablished(s) = self { + if !s.tx_handle.is_finished() { + s.tx_handle.abort(); + debug!("connection {:?} stopped tx task", s.connection_id); + } + } + if let Connection::OutgoingEstablished(s) = self { + if !s.rx_handle.is_finished() { + s.rx_handle.abort(); + debug!("connection {:?} stopped rx task", s.connection_id); + } + if !s.tx_handle.is_finished() { + s.tx_handle.abort(); + debug!("connection {:?} stopped tx task", s.connection_id); } - // FIXME: handle outgoing (stopping rx loop) - _ => {} } } } @@ -584,7 +609,7 @@ impl Ssl for Connection { None } - /// Return the [`tls::SslDigest`] for logging + /// Return the [`crate::protocols::tls::SslDigest`] for logging fn get_ssl_digest(&self) -> Option> { match self { Connection::IncomingEstablished(s) => { diff --git a/pingora-core/src/protocols/l4/quic/sendto.rs b/pingora-core/src/protocols/l4/quic/sendto.rs index 42f7a9e53..291858462 100644 --- a/pingora-core/src/protocols/l4/quic/sendto.rs +++ b/pingora-core/src/protocols/l4/quic/sendto.rs @@ -24,6 +24,9 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//! send UDP datagrams incl. GSO & pacing support + +use nix::sys::socket::{sendmsg, ControlMessage}; use std::cmp; use std::io; use std::os::unix::io::AsRawFd; @@ -43,7 +46,9 @@ pub fn detect_gso(_socket: &mio::net::UdpSocket, _segment_size: usize) -> bool { false } -/// Send packets using sendmsg() with GSO. +/// Send packets using [`sendmsg`] with +/// [Generic Segmentation Offloading](`ControlMessage::UdpGsoSegments`) and +/// [pacing](`ControlMessage::TxTime`). #[cfg(target_os = "linux")] fn send_to_gso_pacing( socket: &tokio::net::UdpSocket, @@ -51,8 +56,6 @@ fn send_to_gso_pacing( send_info: &quiche::SendInfo, segment_size: usize, ) -> io::Result { - use nix::sys::socket::sendmsg; - use nix::sys::socket::ControlMessage; use nix::sys::socket::MsgFlags; use nix::sys::socket::SockaddrStorage; use std::io::IoSlice; diff --git a/pingora-core/src/protocols/tls/quic/client.rs b/pingora-core/src/protocols/tls/quic/client.rs index 7aa8a7d4a..656022be0 100644 --- a/pingora-core/src/protocols/tls/quic/client.rs +++ b/pingora-core/src/protocols/tls/quic/client.rs @@ -1,7 +1,21 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Quic Client TLS Handshake + use crate::listeners::ALPN; -use crate::protocols::l4::quic::connector::{ - ConnectionRx, OutgoingEstablishedState, OutgoingHandshakeState, -}; +use crate::protocols::l4::quic::connector::{ConnectionRx, EstablishedState, HandshakeState}; use crate::protocols::l4::quic::id_token::generate_outgoing_cid; use crate::protocols::l4::quic::{handle_connection_errors, Connection, ConnectionTx, TxStats}; use crate::protocols::IO; @@ -48,7 +62,7 @@ where )); } Connection::OutgoingHandshake(o) => { - handshake_outgoing(o, peer, alpn_override, tls_ctx).await? + handshake_inner(o, peer, alpn_override, tls_ctx).await? } }; @@ -56,16 +70,16 @@ where Ok(stream) } -pub(crate) async fn handshake_outgoing

( - state: &mut OutgoingHandshakeState, +pub(crate) async fn handshake_inner

( + state: &mut HandshakeState, peer: &P, _alpn_override: Option, // potentially HTTP09 could be supported _tls_ctx: &SslConnector, // currently the SslConnector cannot be used with quiche, might be feasible -) -> pingora_error::Result +) -> pingora_error::Result where P: Peer + Send + Sync, { - let OutgoingHandshakeState { + let HandshakeState { crypto, socket_details, configs, @@ -151,7 +165,7 @@ where tx_notify.notify_waiters(); } - let e_state = OutgoingEstablishedState { + let e_state = EstablishedState { connection_id: conn_id.clone(), connection: connection.clone(), diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs index c07f47e0f..7f39d4d98 100644 --- a/pingora-core/src/protocols/tls/quic/mod.rs +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -1,2 +1,18 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Quic TLS handshakes + pub mod client; pub mod server; diff --git a/pingora-core/src/protocols/tls/quic/server.rs b/pingora-core/src/protocols/tls/quic/server.rs index 1c6db9862..c8345bd32 100644 --- a/pingora-core/src/protocols/tls/quic/server.rs +++ b/pingora-core/src/protocols/tls/quic/server.rs @@ -1,6 +1,22 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Quic Server TLS Handshake + use crate::protocols::l4::quic::id_token::{mint_token, validate_token}; use crate::protocols::l4::quic::listener::{ - EstablishedHandle, HandshakeResponse, IncomingEstablishedState, IncomingHandshakeState, + EstablishedHandle, EstablishedState, HandshakeResponse, HandshakeState, }; use crate::protocols::l4::quic::{ handle_connection_errors, Connection, ConnectionTx, TxStats, MAX_IPV6_QUIC_DATAGRAM_SIZE, @@ -34,7 +50,7 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { - if let Some(e_state) = handshake_incoming(i).await? { + if let Some(e_state) = handshake_inner(i).await? { // send HANDSHAKE_DONE Quic frame on established connection e_state.tx_notify.notify_waiters(); Some(e_state) @@ -66,10 +82,10 @@ pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result pingora_error::Result> { - let IncomingHandshakeState { +async fn handshake_inner( + state: &mut HandshakeState, +) -> pingora_error::Result> { + let HandshakeState { connection_id: conn_id, configs, drop_connection, @@ -275,7 +291,7 @@ async fn handshake_incoming( conn.local_error() ); - handle_connection_errors(&conn_id, conn.peer_error(), conn.local_error())?; + handle_connection_errors(conn_id, conn.peer_error(), conn.local_error())?; } let connection = Arc::new(Mutex::new(conn)); @@ -308,7 +324,7 @@ async fn handshake_incoming( tx_stats: TxStats::new(), }; - let e_state = IncomingEstablishedState { + let e_state = EstablishedState { connection_id: conn_id.clone(), connection: connection.clone(), diff --git a/pingora-core/src/upstreams/peer.rs b/pingora-core/src/upstreams/peer.rs index e2aef2fc6..7c26bf77b 100644 --- a/pingora-core/src/upstreams/peer.rs +++ b/pingora-core/src/upstreams/peer.rs @@ -205,6 +205,7 @@ pub trait Peer: Display + Clone { None } + /// Whether UDP/Quic should be used. fn udp_http3(&self) -> bool { let mut udp_http3 = false; if let Some(alpn) = self.get_alpn() { diff --git a/pingora-proxy/src/lib.rs b/pingora-proxy/src/lib.rs index 28608cdc6..89b57ec13 100644 --- a/pingora-proxy/src/lib.rs +++ b/pingora-proxy/src/lib.rs @@ -224,6 +224,9 @@ impl HttpProxy { (server_reused, error) } + ClientSession::H3(mut _h3) => { + todo!() + } }; ( server_reused, diff --git a/pingora/src/lib.rs b/pingora/src/lib.rs index ae2516e33..0b15ec00c 100644 --- a/pingora/src/lib.rs +++ b/pingora/src/lib.rs @@ -27,8 +27,8 @@ //! Pingora is a framework to build fast, reliable and programmable networked systems at Internet scale. //! //! # Features -//! - Http 1.x and Http 2 -//! - Modern TLS with OpenSSL or BoringSSL (FIPS compatible) +//! - Http 1.x, Http 2 and Http 3 +//! - Modern TLS with OpenSSL, BoringSSL (FIPS compatible) and Rustls //! - Zero downtime upgrade //! //! # Usage From 531abdb52fa60444ccaf7aa8e865c2557e29b25d Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Sat, 25 Jan 2025 22:15:39 +0100 Subject: [PATCH 39/52] add read timeout to response headers & trailers build default quiche/http3 config within tls::quic::Connector including the ServerConf parameters allow using rustls & quic-boringssl and overriding quiche/http3 config from peer options correctly route h3 in Connector::new_stream --- pingora-core/Cargo.toml | 2 +- pingora-core/src/connectors/http/mod.rs | 23 +-- pingora-core/src/connectors/http/v2.rs | 59 +------- pingora-core/src/connectors/http/v3.rs | 12 +- pingora-core/src/connectors/l4.rs | 10 +- pingora-core/src/connectors/mod.rs | 46 +++--- .../connectors/tls/boringssl_openssl/mod.rs | 2 - pingora-core/src/connectors/tls/mod.rs | 2 + pingora-core/src/connectors/tls/quic/mod.rs | 142 ++++++++++++++++++ pingora-core/src/protocols/http/client.rs | 6 +- pingora-core/src/protocols/http/server.rs | 2 +- pingora-core/src/protocols/http/v3/client.rs | 61 +++++--- .../src/protocols/l4/quic/connector.rs | 15 +- pingora-core/src/protocols/l4/quic/mod.rs | 91 ++++------- pingora-core/src/protocols/tls/quic/client.rs | 22 ++- pingora-core/src/protocols/tls/quic/mod.rs | 1 + pingora-core/src/protocols/tls/quic/stream.rs | 78 ++++++++++ 17 files changed, 374 insertions(+), 200 deletions(-) create mode 100644 pingora-core/src/connectors/tls/quic/mod.rs create mode 100644 pingora-core/src/protocols/tls/quic/stream.rs diff --git a/pingora-core/Cargo.toml b/pingora-core/Cargo.toml index 082633106..ace853cbb 100644 --- a/pingora-core/Cargo.toml +++ b/pingora-core/Cargo.toml @@ -98,4 +98,4 @@ patched_http1 = ["pingora-http/patched_http1"] openssl_derived = ["any_tls"] any_tls = [] sentry = ["dep:sentry"] -quic-boringssl = ["dep:quiche", "dep:ring", "quiche/boringssl-boring-crate"] +quic-boringssl = ["dep:quiche", "dep:ring", "pingora-boringssl", "quiche/boringssl-boring-crate"] diff --git a/pingora-core/src/connectors/http/mod.rs b/pingora-core/src/connectors/http/mod.rs index d9d746e17..c011db3b1 100644 --- a/pingora-core/src/connectors/http/mod.rs +++ b/pingora-core/src/connectors/http/mod.rs @@ -57,7 +57,15 @@ impl Connector { let h1_only = peer .get_peer_options() .map_or(true, |o| o.alpn.get_max_http_version() == 1); - if h1_only { + + if peer.udp_http3() { + if let Some(h3) = self.h3.reused_http_session(peer).await? { + Ok((HttpSession::H3(h3), true)) + } else { + let session = self.h3.new_http_session(peer).await?; + Ok((session, false)) + } + } else if h1_only { let (h1, reused) = self.h1.get_http_session(peer).await?; Ok((HttpSession::H1(h1), reused)) } else { @@ -81,12 +89,6 @@ impl Connector { let session = self.h2.new_http_session(peer).await?; Ok((session, false)) } - /* - // FIXME: correctly route HTTP3 - let Some(h3) = self.h3.reused_http_session(peer).await?; { - Ok((HttpSession::H3(h3), true)) - } - */ } pub async fn release_http_session( @@ -108,7 +110,6 @@ impl Connector { } } -// TODO: also use in v2, currently only used in v3 pub(crate) struct InUsePool { // TODO: use pingora hashmap to shard the lock contention pools: RwLock>>, @@ -135,15 +136,15 @@ impl InUsePool { pools.insert(reuse_hash, pool); } - // retrieve a h2 conn ref to create a new stream - // the caller should return the conn ref to this pool if there are still + // retrieve a `` to create a new stream + // the caller should return the to this pool if there is still // capacity left for more streams pub(crate) fn get(&self, reuse_hash: u64) -> Option { let pools = self.pools.read(); pools.get(&reuse_hash)?.get_any().map(|v| v.1) } - // release a h2_stream, this functional will cause an ConnectionRef to be returned (if exist) + // release a http stream, this functional will cause an `` to be returned (if exist) // the caller should update the ref and then decide where to put it (in use pool or idle) pub(crate) fn release(&self, reuse_hash: u64, id: UniqueIDType) -> Option { let pools = self.pools.read(); diff --git a/pingora-core/src/connectors/http/v2.rs b/pingora-core/src/connectors/http/v2.rs index bbec0cb44..a1718ac3d 100644 --- a/pingora-core/src/connectors/http/v2.rs +++ b/pingora-core/src/connectors/http/v2.rs @@ -14,20 +14,19 @@ //! Connecting to HTTP 2 servers -use super::HttpSession; +use super::{HttpSession, InUsePool}; use crate::connectors::{ConnectorOptions, TransportConnector}; use crate::protocols::http::v1::client::HttpSession as Http1Session; use crate::protocols::http::v2::client::{drive_connection, Http2Session}; -use crate::protocols::{Digest, Stream, UniqueIDType}; +use crate::protocols::{Digest, Stream, UniqueID, UniqueIDType}; use crate::upstreams::peer::{Peer, ALPN}; use bytes::Bytes; use h2::client::SendRequest; use log::debug; -use parking_lot::{Mutex, RwLock}; +use parking_lot::Mutex; use pingora_error::{Error, ErrorType::*, OrErr, Result}; -use pingora_pool::{ConnectionMeta, ConnectionPool, PoolNode}; -use std::collections::HashMap; +use pingora_pool::{ConnectionMeta, ConnectionPool}; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Duration; @@ -164,51 +163,9 @@ impl ConnectionRef { } } -// FIXME: potentially lift to mod.rs -pub(crate) struct InUsePool { - // TODO: use pingora hashmap to shard the lock contention - pools: RwLock>>, -} - -impl InUsePool { - pub(crate) fn new() -> Self { - InUsePool { - pools: RwLock::new(HashMap::new()), - } - } - - pub(crate) fn insert(&self, reuse_hash: u64, conn: ConnectionRef) { - { - let pools = self.pools.read(); - if let Some(pool) = pools.get(&reuse_hash) { - pool.insert(conn.id(), conn); - return; - } - } // drop read lock - - let pool = PoolNode::new(); - pool.insert(conn.id(), conn); - let mut pools = self.pools.write(); - pools.insert(reuse_hash, pool); - } - - // retrieve a h2 conn ref to create a new stream - // the caller should return the conn ref to this pool if there are still - // capacity left for more streams - pub(crate) fn get(&self, reuse_hash: u64) -> Option { - let pools = self.pools.read(); - pools.get(&reuse_hash)?.get_any().map(|v| v.1) - } - - // release a h2_stream, this functional will cause an ConnectionRef to be returned (if exist) - // the caller should update the ref and then decide where to put it (in use pool or idle) - pub(crate) fn release(&self, reuse_hash: u64, id: UniqueIDType) -> Option { - let pools = self.pools.read(); - if let Some(pool) = pools.get(&reuse_hash) { - pool.remove(id) - } else { - None - } +impl UniqueID for ConnectionRef { + fn id(&self) -> UniqueIDType { + self.0.id } } @@ -221,7 +178,7 @@ pub struct Connector { // the h2 connection idle pool idle_pool: Arc>, // the pool of h2 connections that have ongoing streams - in_use_pool: InUsePool, + in_use_pool: InUsePool, } impl Connector { diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 2216a09d8..eda19a8bf 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -36,7 +36,6 @@ use std::sync::Arc; use std::time::Duration; use tokio::sync::{mpsc, watch}; use tokio::task::JoinHandle; -// FIXME: ConnectorOptions contains CA file path from ServerConfig /// a ref to an established HTTP 3 connection #[derive(Clone)] @@ -192,7 +191,7 @@ impl Connector { // lock the connection before adding a stream // ensures that moving between pools and e.g. idle() checks is guarded let _release_lock = conn.0.release_lock.lock_arc(); - let h3_stream = conn.spawn_stream().await?; + let h3_stream = conn.spawn_stream(); if conn.more_streams_allowed() { self.in_use_pool.insert(reuse_hash, conn); } @@ -274,7 +273,6 @@ impl Connector { let h3_stream = conn .spawn_stream() - .await? .expect("newly created connections should have at least one free stream"); if conn.more_streams_allowed() { @@ -287,18 +285,18 @@ impl Connector { impl ConnectionRef { // spawn a stream if more stream is allowed, otherwise return Ok(None) - pub async fn spawn_stream(&self) -> Result> { + fn spawn_stream(&self) -> Option { // Atomically check if the current_stream is over the limit // load(), compare and then fetch_add() cannot guarantee the same let current_streams = self.0.current_streams.fetch_add(1, Ordering::SeqCst); if current_streams >= self.0.max_streams { // already over the limit, reset the counter to the previous value self.0.current_streams.fetch_sub(1, Ordering::SeqCst); - return Ok(None); + return None; } - let h3_session = Http3Session::new(self.clone())?; - Ok(Some(h3_session)) + let h3_session = Http3Session::new(self.clone()); + Some(h3_session) } pub fn more_streams_allowed(&self) -> bool { diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index c781b767f..0b5cf9f45 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -267,12 +267,7 @@ where } }?; - let mut quic_http3_configs = None; - if let Some(peer_options) = peer.get_peer_options() { - quic_http3_configs.clone_from(&peer_options.quic_http3_configs) - }; - - Ok(Connection::initiate(socket, quic_http3_configs)?.into()) + Ok(Connection::initiate(socket)?.into()) } SocketAddr::Unix(_addr) => { // NOTE: tokio::net::UnixDatagram support could be an option @@ -686,7 +681,8 @@ mod quic_tests { assert!(pre_handshake_stream.quic_connection_state().is_some()); let tls_connector = tls::Connector::new(None); - let mut stream = do_connect(&peer, None, None, &tls_connector.ctx).await?; + let quic_connector = tls::quic::Connector::new(None); + let mut stream = do_connect(&peer, None, None, &tls_connector, &quic_connector).await?; assert!(stream.quic_connection_state().is_some()); let connection = stream.quic_connection_state().unwrap(); diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index 669130001..364ddbd81 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -39,7 +39,6 @@ use pingora_pool::{ConnectionMeta, ConnectionPool}; use std::collections::HashMap; use std::net::SocketAddr; use std::sync::Arc; -use tls::TlsConnector; use tokio::sync::Mutex; /// The options to configure a [TransportConnector] @@ -131,6 +130,7 @@ impl ConnectorOptions { /// [TransportConnector] provides APIs to connect to servers via TCP or TLS with connection reuse pub struct TransportConnector { tls_ctx: tls::Connector, + quic_tls_ctx: tls::quic::Connector, connection_pool: Arc>>>, offload: Option, bind_to_v4: Vec, @@ -156,7 +156,8 @@ impl TransportConnector { .as_ref() .map_or_else(Vec::new, |o| o.bind_to_v6.clone()); TransportConnector { - tls_ctx: tls::Connector::new(options), + tls_ctx: tls::Connector::new(options.clone()), + quic_tls_ctx: tls::quic::Connector::new(options), connection_pool: Arc::new(ConnectionPool::new(pool_size)), offload: offload.map(|v| OffloadRuntime::new(v.0, v.1)), bind_to_v4, @@ -178,11 +179,21 @@ impl TransportConnector { let stream = if let Some(rt) = rt { let peer = peer.clone(); let tls_ctx = self.tls_ctx.clone(); - rt.spawn(async move { do_connect(&peer, bind_to, alpn_override, &tls_ctx.ctx).await }) - .await - .or_err(InternalError, "offload runtime failure")?? + let quic_tls_ctx = self.quic_tls_ctx.clone(); + rt.spawn(async move { + do_connect(&peer, bind_to, alpn_override, &tls_ctx, &quic_tls_ctx).await + }) + .await + .or_err(InternalError, "offload runtime failure")?? } else { - do_connect(peer, bind_to, alpn_override, &self.tls_ctx.ctx).await? + do_connect( + peer, + bind_to, + alpn_override, + &self.tls_ctx, + &self.quic_tls_ctx, + ) + .await? }; Ok(stream) @@ -299,11 +310,12 @@ async fn do_connect( peer: &P, bind_to: Option, alpn_override: Option, - tls_ctx: &TlsConnector, + tls_ctx: &tls::Connector, + quic_tls_ctx: &tls::quic::Connector, ) -> Result { // Create the future that does the connections, but don't evaluate it until // we decide if we need a timeout or not - let connect_future = do_connect_inner(peer, bind_to, alpn_override, tls_ctx); + let connect_future = do_connect_inner(peer, bind_to, alpn_override, tls_ctx, quic_tls_ctx); match peer.total_connection_timeout() { Some(t) => match pingora_timeout::timeout(t, connect_future).await { @@ -322,7 +334,8 @@ async fn do_connect_inner( peer: &P, bind_to: Option, alpn_override: Option, - tls_ctx: &TlsConnector, + tls_ctx: &tls::Connector, + quic_tls_ctx: &tls::quic::Connector, ) -> Result { let stream = l4_connect(peer, bind_to).await?; if peer.tls() { @@ -333,14 +346,12 @@ async fn do_connect_inner( "usage of HTTP3 requires enabled TLS for the peer", )); } - // TODO: use tls_ctx with boringssl & quiche - // tls_ctx is already built, but quiche only provides a Config::from_boring() - // accepting a SslContextBuilder, but internally calling only .build() on it, - // likely a SslContext it should be possible to adapt quiche to accept a SslConnector - let quic_stream = quic_handshake(stream, peer, alpn_override, tls_ctx).await?; + + let quic_stream = + quic_handshake(stream, peer, alpn_override, &quic_tls_ctx.quic_http3).await?; Ok(Box::new(quic_stream)) } else { - let tls_stream = tls::connect(stream, peer, alpn_override, tls_ctx).await?; + let tls_stream = tls::connect(stream, peer, alpn_override, &tls_ctx.ctx).await?; Ok(Box::new(tls_stream)) } } else { @@ -373,7 +384,6 @@ impl PreferredHttpVersion { let v = self.versions.read(); v.get(&key) .copied() - // FIXME: H3 support .map(|v| if v == 1 { ALPN::H1 } else { ALPN::H2H1 }) } } @@ -411,6 +421,7 @@ mod tests { use tls::Connector; use super::*; + use crate::connectors::tls::quic; use crate::upstreams::peer::BasicPeer; use tokio::io::AsyncWriteExt; #[cfg(unix)] @@ -521,7 +532,8 @@ mod tests { /// the decomposed error type and message async fn get_do_connect_failure_with_peer(peer: &BasicPeer) -> (ErrorType, String) { let tls_connector = Connector::new(None); - let stream = do_connect(peer, None, None, &tls_connector.ctx).await; + let quic_connector = quic::Connector::new(None); + let stream = do_connect(peer, None, None, &tls_connector, &quic_connector).await; match stream { Ok(_) => panic!("should throw an error"), Err(e) => ( diff --git a/pingora-core/src/connectors/tls/boringssl_openssl/mod.rs b/pingora-core/src/connectors/tls/boringssl_openssl/mod.rs index a57668fb6..9983ac1a3 100644 --- a/pingora-core/src/connectors/tls/boringssl_openssl/mod.rs +++ b/pingora-core/src/connectors/tls/boringssl_openssl/mod.rs @@ -32,8 +32,6 @@ use crate::tls::ssl::{SslConnector, SslFiletype, SslMethod, SslVerifyMode, SslVe use crate::tls::x509::store::X509StoreBuilder; use crate::upstreams::peer::{Peer, ALPN}; -pub type TlsConnector = SslConnector; - const CIPHER_LIST: &str = "AES-128-GCM-SHA256\ :AES-256-GCM-SHA384\ :CHACHA20-POLY1305-SHA256\ diff --git a/pingora-core/src/connectors/tls/mod.rs b/pingora-core/src/connectors/tls/mod.rs index 25a7b48a3..da6cee9e6 100644 --- a/pingora-core/src/connectors/tls/mod.rs +++ b/pingora-core/src/connectors/tls/mod.rs @@ -18,6 +18,8 @@ mod boringssl_openssl; #[cfg(feature = "openssl_derived")] pub use boringssl_openssl::*; +pub(crate) mod quic; + #[cfg(feature = "rustls")] mod rustls; diff --git a/pingora-core/src/connectors/tls/quic/mod.rs b/pingora-core/src/connectors/tls/quic/mod.rs new file mode 100644 index 000000000..1cc24463a --- /dev/null +++ b/pingora-core/src/connectors/tls/quic/mod.rs @@ -0,0 +1,142 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Quic TLS Connector + +use crate::connectors::ConnectorOptions; +use crate::protocols::l4::quic::QuicHttp3Configs; +use pingora_boringssl::ssl::{SslContextBuilder, SslCurve, SslFiletype, SslMethod, SslVersion}; +use std::ffi::OsStr; +use std::sync::Once; + +const CIPHER_LIST: &str = "AES-128-GCM-SHA256\ + :AES-256-GCM-SHA384\ + :CHACHA20-POLY1305-SHA256\ + :ECDHE-ECDSA-AES128-GCM-SHA256\ + :ECDHE-ECDSA-AES256-GCM-SHA384\ + :ECDHE-RSA-AES128-GCM-SHA256\ + :ECDHE-RSA-AES256-GCM-SHA384\ + :ECDHE-RSA-AES128-SHA\ + :ECDHE-RSA-AES256-SHA384\ + :AES128-GCM-SHA256\ + :AES256-GCM-SHA384\ + :AES128-SHA\ + :AES256-SHA\ + :DES-CBC3-SHA"; + +/** + * Enabled signature algorithms for signing/verification (ECDSA). + * As of 4/10/2023, the only addition to boringssl's defaults is ECDSA_SECP521R1_SHA512. + */ +const SIGALG_LIST: &str = "ECDSA_SECP256R1_SHA256\ + :RSA_PSS_RSAE_SHA256\ + :RSA_PKCS1_SHA256\ + :ECDSA_SECP384R1_SHA384\ + :RSA_PSS_RSAE_SHA384\ + :RSA_PKCS1_SHA384\ + :RSA_PSS_RSAE_SHA512\ + :RSA_PKCS1_SHA512\ + :RSA_PKCS1_SHA1\ + :ECDSA_SECP521R1_SHA512"; +/** + * Enabled curves for ECDHE (signature key exchange). + * As of 4/10/2023, the only addition to boringssl's defaults is SECP521R1. + * + * N.B. The ordering of these curves is important. The boringssl library will select the first one + * as a guess when negotiating a handshake with a server using TLSv1.3. We should opt for curves + * that are both computationally cheaper and more supported. + */ +const BORINGSSL_CURVE_LIST: &[SslCurve] = &[ + SslCurve::X25519, + SslCurve::SECP256R1, + SslCurve::SECP384R1, + SslCurve::SECP521R1, +]; + +static INIT_CA_ENV: Once = Once::new(); +fn init_ssl_cert_env_vars() { + // this sets env vars to pick up the root certs + // it is universal across openssl and boringssl + INIT_CA_ENV.call_once(openssl_probe::init_ssl_cert_env_vars); +} + +/// enables rustls & quic-boringssl usage +// at the cost of some duplication +#[derive(Clone)] +pub struct Connector { + pub(crate) quic_http3: QuicHttp3Configs, +} + +impl Connector { + pub fn new(options: Option) -> Self { + let mut quic_builder = SslContextBuilder::new(SslMethod::tls()).unwrap(); + // TODO: make these conf + // Set supported ciphers. + quic_builder.set_cipher_list(CIPHER_LIST).unwrap(); + // Set supported signature algorithms and ECDH (key exchange) curves. + + quic_builder + .set_sigalgs_list(&SIGALG_LIST.to_lowercase()) + .unwrap(); + quic_builder.set_curves(BORINGSSL_CURVE_LIST).unwrap(); + + quic_builder + .set_max_proto_version(Some(SslVersion::TLS1_3)) + .unwrap(); + quic_builder + .set_min_proto_version(Some(SslVersion::TLS1_3)) // HTTP3 requires TLS 1.3 + .unwrap(); + + if let Some(conf) = options.as_ref() { + if let Some(ca_file_path) = conf.ca_file.as_ref() { + quic_builder.set_ca_file(ca_file_path).unwrap(); + } else { + init_ssl_cert_env_vars(); + // load from default system wide trust location. (the name is misleading) + quic_builder.set_default_verify_paths().unwrap(); + } + if let Some((cert, key)) = conf.cert_key_file.as_ref() { + quic_builder + .set_certificate_file(cert, SslFiletype::PEM) + .unwrap(); + quic_builder + .set_private_key_file(key, SslFiletype::PEM) + .unwrap(); + } + if conf.debug_ssl_keylog { + // write TLS keys to file specified by SSLKEYLOGFILE if it exists + if let Some(quic_keylog) = std::env::var_os("SSLKEYLOGFILE").and_then(|mut path| { + path.push(OsStr::new(".quic")); // suffix to avoid collisions with TCP/TLS keylog + std::fs::OpenOptions::new() + .append(true) + .create(true) + .open(path) + .ok() + }) { + use std::io::Write; + quic_builder.set_keylog_callback(move |_, line| { + let _ = writeln!(&quic_keylog, "{}", line); + }); + } + } + } else { + init_ssl_cert_env_vars(); + quic_builder.set_default_verify_paths().unwrap(); + } + + Connector { + quic_http3: QuicHttp3Configs::with_boring_ssl_ctx_builder(quic_builder).unwrap(), + } + } +} diff --git a/pingora-core/src/protocols/http/client.rs b/pingora-core/src/protocols/http/client.rs index 657694ad0..c4ede7603 100644 --- a/pingora-core/src/protocols/http/client.rs +++ b/pingora-core/src/protocols/http/client.rs @@ -151,8 +151,7 @@ impl HttpSession { /// Give up the http session abruptly. /// For H1 this will close the underlying connection /// For H2 this will send RST_STREAM frame to end this stream if the stream has not ended at all - /// TODO: fix h3 documentation - /// For H3 this will + /// For H3 this will send a `STOP_SENDING` and a `RESET_STREAM` for the Quic stream to the client. pub async fn shutdown(&mut self) { match self { Self::H1(s) => s.shutdown().await, @@ -186,8 +185,7 @@ impl HttpSession { /// Return a mutable [Digest] reference for the connection. /// - /// Will return `None` if this is an H2 session and multiple streams are open. - /// TODO: fix h3 documentation + /// Will return `None` if this is an H2 or H3 session and multiple streams are open. pub fn digest_mut(&mut self) -> Option<&mut Digest> { match self { Self::H1(s) => Some(s.digest_mut()), diff --git a/pingora-core/src/protocols/http/server.rs b/pingora-core/src/protocols/http/server.rs index 313a062c8..45401af3a 100644 --- a/pingora-core/src/protocols/http/server.rs +++ b/pingora-core/src/protocols/http/server.rs @@ -490,7 +490,7 @@ impl Session { /// Return a mutable [Digest] reference for the connection. /// - /// Will return `None` if multiple H2 streams are open. + /// Will return `None` if this is an H2 or H3 session and multiple streams are open. pub fn digest_mut(&mut self) -> Option<&mut Digest> { match self { Self::H1(s) => Some(s.digest_mut()), diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index 011dcc8ef..fdd05b9ff 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -52,10 +52,9 @@ pub struct Http3Session { // HTTP3 event channel for this stream_id event_rx: Option>, - /// The read timeout, which will be applied to both reading the header and the body. - /// The timeout is reset on every read. This is not a timeout on the overall duration of the - /// response. - // FIXME: race with timeout if present + /// The read timeout, which will be applied when reading the header, body and trailers. + /// The timeout is reset on every read attempt. This is not a timeout on the overall duration + /// of the response. pub read_timeout: Option, // sent request @@ -98,8 +97,8 @@ impl Drop for Http3Session { } impl Http3Session { - pub(crate) fn new(conn: ConnectionRef) -> Result { - Ok(Self { + pub(crate) fn new(conn: ConnectionRef) -> Self { + Self { conn, stream_id: None, event_rx: None, @@ -112,7 +111,7 @@ impl Http3Session { body_read: 0, read_continue: false, read_ended: false, - }) + } } /// Write the request header to the server @@ -200,10 +199,18 @@ impl Http3Session { return Ok(()); }; - let (headers, _) = headers_event(self.stream_id()?, self.event_rx()?).await?; - let map = event_to_response_headers(&headers)?; + let read_timeout = self.read_timeout; + tokio::select! { + res = headers_event(self.stream_id()?, self.event_rx()?) => { + let (headers, _) = res?; + let map = event_to_response_headers(&headers)?; + self.response_header = Some(map); + }, + _timedout = timeout(read_timeout) => { + return Err(Error::explain(ErrorType::ReadTimedout, "reading response headers timed out")) + } + } - self.response_header = Some(map); Ok(()) } @@ -245,14 +252,8 @@ impl Http3Session { return Ok(None) } }, - _timedout = async { - if let Some(read_timeout) = read_timeout { - tokio::time::sleep(read_timeout).await; - } else { - tokio::time::sleep(Duration::MAX).await; - } - } => { - return Err(Error::explain(ErrorType::ReadTimedout, "reading body timed out")) + _timedout = timeout(read_timeout) => { + return Err(Error::explain(ErrorType::ReadTimedout, "reading response body timed out")) } } @@ -291,8 +292,8 @@ impl Http3Session { // RFC9110 Section 6.5.1 // The presence of the keyword "trailers" in the TE header field (Section 10.1.4) of - // a request indicates that the client is willing to accept trailer fields, - // on behalf of itself and any downstream clients. + // a request indicates that the client is willing to accept trailer fields, on behalf of + // itself and any downstream clients. let mut client_accepts = false; if let Some(headers) = &self.request_header_written { if let Some(te_header) = headers.headers.get(http::header::TE) { @@ -316,8 +317,16 @@ impl Http3Session { // as per RFC9114/Section 4.1 it is an optional SINGLE header frame // only possible when supported by the version of HTTP in use and enabled by an explicit // framing mechanism - let (trailers, _) = headers_event(self.stream_id()?, self.event_rx()?).await?; - let trailer_map = headervec_to_headermap(&trailers)?; + let read_timeout = self.read_timeout; + let trailer_map = tokio::select! { + res = headers_event(self.stream_id()?, self.event_rx()?) => { + let (trailers, _) = res?; + headervec_to_headermap(&trailers)? + }, + _timedout = timeout(read_timeout) => { + return Err(Error::explain(ErrorType::ReadTimedout, "reading response body timed out")) + } + }; Ok(Some(trailer_map)) } @@ -542,3 +551,11 @@ fn housekeeping_add_sessions( } } } + +async fn timeout(timeout: Option) { + if let Some(timeout) = timeout { + tokio::time::sleep(timeout).await; + } else { + tokio::time::sleep(Duration::MAX).await; + } +} diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index be63246c0..149b8c406 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -15,9 +15,7 @@ //! Quic Connector use crate::protocols::l4::quic::Connection; -use crate::protocols::l4::quic::{ - detect_gso_pacing, Crypto, QuicHttp3Configs, SocketDetails, MAX_IPV6_BUF_SIZE, -}; +use crate::protocols::l4::quic::{detect_gso_pacing, Crypto, SocketDetails, MAX_IPV6_BUF_SIZE}; use log::{debug, error, trace}; use parking_lot::Mutex; use pingora_error::{ErrorType, OrErr, Result}; @@ -33,7 +31,6 @@ pub struct HandshakeState { //pub(crate) connection_id: ConnectionId<'static>, pub(crate) socket_details: SocketDetails, pub(crate) crypto: Crypto, - pub(crate) configs: QuicHttp3Configs, } /// can be used to wait for network data or trigger network sending @@ -57,7 +54,7 @@ pub struct EstablishedState { } impl Connection { - pub fn initiate(io: UdpSocket, configs: Option) -> Result { + pub fn initiate(io: UdpSocket) -> Result { let local_addr = io.local_addr().explain_err(ErrorType::SocketError, |e| { format!("failed to get local address from socket: {}", e) })?; @@ -65,11 +62,9 @@ impl Connection { format!("failed to get peer address from socket: {}", e) })?; - let configs = configs.unwrap_or(QuicHttp3Configs::from_ca_file_path(None)?); - let (gso_enabled, pacing_enabled) = detect_gso_pacing(&io); Ok(Self::OutgoingHandshake(HandshakeState { - crypto: Crypto::new()?, + crypto: Crypto::new()?, // TODO:: custom crypto or cid generation/validation socket_details: SocketDetails { io: Arc::new(io), local_addr, @@ -77,7 +72,6 @@ impl Connection { gso_enabled, pacing_enabled, }, - configs, })) } } @@ -103,8 +97,7 @@ impl ConnectionRx { let local_addr = self.socket_details.local_addr; let conn_id = self.connection_id; - // TODO: support ip switching on local & peer address - // would require socket re-binding + // support ip switching on local & peer address would require socket re-binding let mut buf = [0u8; MAX_IPV6_BUF_SIZE]; debug!("connection {:?} rx read", conn_id); 'read: loop { diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 2a3e20c49..98013247a 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -46,6 +46,7 @@ use crate::listeners::ALPN; use crate::protocols::l4::quic::sendto::{detect_gso, send_to, set_txtime_sockopt}; use crate::protocols::tls::{SslDigest, TlsRef}; use crate::protocols::{ConnectionState, Ssl}; +use pingora_boringssl::ssl::SslContextBuilder; // UDP header 8 bytes, IPv4 Header 20 bytes //pub const MAX_IPV4_BUF_SIZE: usize = 65507; @@ -58,7 +59,7 @@ pub const MAX_IPV6_BUF_SIZE: usize = 65487; pub const MAX_IPV6_UDP_PACKET_SIZE: usize = 1452; //pub const MAX_IPV4_QUIC_DATAGRAM_SIZE: usize = 1370; -// TODO: validate size (possibly 1200 is the standard) +// TODO: validate size (is 1200 the standard?) pub const MAX_IPV6_QUIC_DATAGRAM_SIZE: usize = 1350; /// initial size for the connection drop deque @@ -317,31 +318,9 @@ impl QuicHttp3Configs { })?; }; - quic.set_application_protos(h3::APPLICATION_PROTOCOL) - .explain_err(ErrorType::InternalError, |_| { - "Failed to set application protocols." - })?; - - quic.grease(false); // default true - - quic.set_max_idle_timeout(60 * 1000); // default ulimited - quic.set_max_recv_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // recv default is 65527 - quic.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 - quic.set_initial_max_data(10_000_000); // 10 Mb - quic.set_initial_max_stream_data_bidi_local(1_000_000); // 1 Mb - quic.set_initial_max_stream_data_bidi_remote(1_000_000); // 1 Mb - quic.set_initial_max_stream_data_uni(1_000_000); // 1 Mb - quic.set_initial_max_streams_bidi(100); - quic.set_initial_max_streams_uni(100); - - quic.set_disable_active_migration(true); // default is false - - // quic.set_active_connection_id_limit(2); // default 2 - // quic.set_max_connection_window(conn_args.max_window); // default 24 Mb - // quic.set_max_stream_window(conn_args.max_stream_window); // default 16 Mb - - Ok(quic) + QuicHttp3Configs::set_quic_defaults(quic) } + pub fn new_quic_listener(cert_chain_pem_file: &str, priv_key_pem_file: &str) -> Result { let mut quic = Config::new(quiche::PROTOCOL_VERSION) .explain_err(ErrorType::InternalError, |_| { @@ -358,25 +337,21 @@ impl QuicHttp3Configs { "Could not load private key from pem file." })?; - // quic.load_verify_locations_from_file() for CA's - // quic.verify_peer(); default server = false; client = true - // quic.discover_pmtu(false); // default false - quic.grease(false); // default true - // quic.log_keys() && config.set_keylog(); // logging SSL secrets - // quic.set_ticket_key() // session ticket signer key material - - //config.enable_early_data(); // can lead to ZeroRTT headers during handshake + QuicHttp3Configs::set_quic_defaults(quic) + } + fn set_quic_defaults(mut quic: Config) -> Result { quic.set_application_protos(h3::APPLICATION_PROTOCOL) .explain_err(ErrorType::InternalError, |_| { "Failed to set application protocols." })?; + quic.grease(false); // default true - // quic.set_application_protos_wire_format(); - // quic.set_max_amplification_factor(3); // anti-amplification limit factor; default 3 + // TODO: usable for mTLS? + // quic.verify_peer(); default server = false; client = true quic.set_max_idle_timeout(60 * 1000); // default ulimited - quic.set_max_recv_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // recv default is 65527 + quic.set_max_recv_udp_payload_size(MAX_IPV6_BUF_SIZE); // recv default is 65527 quic.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 quic.set_initial_max_data(10_000_000); // 10 Mb quic.set_initial_max_stream_data_bidi_local(1_000_000); // 1 Mb @@ -385,29 +360,6 @@ impl QuicHttp3Configs { quic.set_initial_max_streams_bidi(100); quic.set_initial_max_streams_uni(100); - // quic.set_ack_delay_exponent(3); // default 3 - // quic.set_max_ack_delay(25); // default 25 - // quic.set_active_connection_id_limit(2); // default 2 - // quic.set_disable_active_migration(false); // default false - - // quic.set_active_connection_id_limit(2); // default 2 - // quic.set_disable_active_migration(false); // default false - // quic.set_cc_algorithm_name("cubic"); // default cubic - // quic.set_initial_congestion_window_packets(10); // default 10 - // quic.set_cc_algorithm(CongestionControlAlgorithm::CUBIC); // default CongestionControlAlgorithm::CUBIC - - // quic.enable_hystart(true); // default true - // quic.enable_pacing(true); // default true - // quic.set_max_pacing_rate(); // default ulimited - - //config.enable_dgram(false); // default false - - // quic.set_path_challenge_recv_max_queue_len(3); // default 3 - // quic.set_max_connection_window(MAX_CONNECTION_WINDOW); // default 24 Mb - // quic.set_max_stream_window(MAX_STREAM_WINDOW); // default 16 Mb - // quic.set_stateless_reset_token(None) // default None - // quic.set_disable_dcid_reuse(false) // default false - Ok(quic) } @@ -452,6 +404,21 @@ impl QuicHttp3Configs { }) } + pub(crate) fn with_boring_ssl_ctx_builder(ctx_builder: SslContextBuilder) -> Result { + let quic = Config::with_boring_ssl_ctx_builder(quiche::PROTOCOL_VERSION, ctx_builder) + .explain_err(ErrorType::InternalError, |_| { + "Failed to create quiche config." + })?; + + let quic = QuicHttp3Configs::set_quic_defaults(quic)?; + + let http3 = QuicHttp3Configs::new_http3()?; + Ok(Self { + quic: Arc::new(Mutex::new(quic)), + http3: Arc::new(http3), + }) + } + pub fn quic(&self) -> &Arc> { &self.quic } @@ -615,7 +582,7 @@ impl Ssl for Connection { Connection::IncomingEstablished(s) => { let mut conn = s.connection.lock(); let conn = &mut *conn; - Some(Arc::from(SslDigest::from_ssl(conn.as_mut()))) + Some(Arc::from(SslDigest::from_quic_ssl(conn.as_mut()))) } _ => None, } @@ -648,7 +615,9 @@ impl AsRawFd for Connection { } } -#[allow(unused_variables)] // TODO: remove +// TODO: remove, ideally requirement for AsyncRead & AsyncWrite on Stream +// possibly switch to AsyncIO & IO and/or AsyncStream & Stream +#[allow(unused_variables)] impl AsyncWrite for Connection { fn poll_write( self: Pin<&mut Self>, diff --git a/pingora-core/src/protocols/tls/quic/client.rs b/pingora-core/src/protocols/tls/quic/client.rs index 656022be0..c74b2b28d 100644 --- a/pingora-core/src/protocols/tls/quic/client.rs +++ b/pingora-core/src/protocols/tls/quic/client.rs @@ -17,12 +17,13 @@ use crate::listeners::ALPN; use crate::protocols::l4::quic::connector::{ConnectionRx, EstablishedState, HandshakeState}; use crate::protocols::l4::quic::id_token::generate_outgoing_cid; -use crate::protocols::l4::quic::{handle_connection_errors, Connection, ConnectionTx, TxStats}; +use crate::protocols::l4::quic::{ + handle_connection_errors, Connection, ConnectionTx, QuicHttp3Configs, TxStats, +}; use crate::protocols::IO; use crate::upstreams::peer::Peer; use log::{info, trace}; use parking_lot::Mutex; -use pingora_boringssl::ssl::SslConnector; use pingora_error::ErrorType::HandshakeError; use pingora_error::{Error, ErrorType, OrErr}; use std::sync::Arc; @@ -32,7 +33,7 @@ pub(crate) async fn handshake( mut stream: T, peer: &P, alpn_override: Option, - tls_ctx: &SslConnector, + tls_ctx: &QuicHttp3Configs, ) -> pingora_error::Result where T: IO, @@ -74,7 +75,7 @@ pub(crate) async fn handshake_inner

( state: &mut HandshakeState, peer: &P, _alpn_override: Option, // potentially HTTP09 could be supported - _tls_ctx: &SslConnector, // currently the SslConnector cannot be used with quiche, might be feasible + quic_tls_ctx: &QuicHttp3Configs, ) -> pingora_error::Result where P: Peer + Send + Sync, @@ -82,9 +83,20 @@ where let HandshakeState { crypto, socket_details, - configs, } = state; + let mut peer_quic_http3_config = None; + if let Some(peer_options) = peer.get_peer_options() { + peer_quic_http3_config.clone_from(&peer_options.quic_http3_configs) + }; + + // use peer config in case present or fallback to tls_ctx from TransportConnector + let configs = if let Some(peer_quic_http3_config) = peer_quic_http3_config { + peer_quic_http3_config + } else { + quic_tls_ctx.clone() + }; + let conn_id = generate_outgoing_cid(&crypto.rng); let local_addr = socket_details.local_addr; diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs index 7f39d4d98..9688d457a 100644 --- a/pingora-core/src/protocols/tls/quic/mod.rs +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -16,3 +16,4 @@ pub mod client; pub mod server; +mod stream; diff --git a/pingora-core/src/protocols/tls/quic/stream.rs b/pingora-core/src/protocols/tls/quic/stream.rs new file mode 100644 index 000000000..d53a9ea64 --- /dev/null +++ b/pingora-core/src/protocols/tls/quic/stream.rs @@ -0,0 +1,78 @@ +// Copyright 2024 Cloudflare, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::protocols::tls::SslDigest; +use pingora_boringssl::{hash::MessageDigest, ssl::SslRef}; +use pingora_boringssl::{nid::Nid, x509::X509}; +use pingora_error::{ErrorType::*, OrErr, Result}; + +// enables rustls & quic-boringssl usage +// at the cost of some duplication + +impl SslDigest { + pub(crate) fn from_quic_ssl(ssl: &SslRef) -> Self { + let cipher = match ssl.current_cipher() { + Some(c) => c.name(), + None => "", + }; + + let (cert_digest, org, sn) = match ssl.peer_certificate() { + Some(cert) => { + let cert_digest = match cert.digest(MessageDigest::sha256()) { + Ok(c) => c.as_ref().to_vec(), + Err(_) => Vec::new(), + }; + (cert_digest, get_organization(&cert), get_serial(&cert).ok()) + } + None => (Vec::new(), None, None), + }; + + SslDigest { + cipher, + version: ssl.version_str(), + organization: org, + serial_number: sn, + cert_digest, + } + } +} + +fn get_subject_name(cert: &X509, name_type: Nid) -> Option { + cert.subject_name() + .entries_by_nid(name_type) + .next() + .map(|name| { + name.data() + .as_utf8() + .map(|s| s.to_string()) + .unwrap_or_default() + }) +} + +/// Return the organization associated with the X509 certificate. +pub fn get_organization(cert: &X509) -> Option { + get_subject_name(cert, Nid::ORGANIZATIONNAME) +} + +/// Return the serial number associated with the X509 certificate as a hexadecimal value. +pub fn get_serial(cert: &X509) -> Result { + let bn = cert + .serial_number() + .to_bn() + .or_err(InvalidCert, "Invalid serial")?; + let hex = bn.to_hex_str().or_err(InvalidCert, "Invalid serial")?; + + let hex_str: &str = hex.as_ref(); + Ok(hex_str.to_owned()) +} From 7b2f62c1651a8bcb8596e4974f195d78a20c2d87 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 30 Jan 2025 14:49:15 +0100 Subject: [PATCH 40/52] fix current_streams AtomicUnit overflow --- pingora-core/src/connectors/http/v3.rs | 63 ++++++++++++++++++- pingora-core/src/protocols/http/client.rs | 2 +- pingora-core/src/protocols/http/v3/client.rs | 12 ++-- .../src/protocols/l4/quic/connector.rs | 2 +- pingora-core/src/protocols/l4/quic/mod.rs | 2 + pingora-core/tests/test_basic.rs | 2 +- pingora-core/tests/utils/mod.rs | 2 +- 7 files changed, 73 insertions(+), 12 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index eda19a8bf..836bdde7f 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -91,7 +91,6 @@ impl ConnectionRef { } pub(crate) fn drop_session(&self, stream_id: u64) { - self.0.current_streams.fetch_sub(1, Ordering::SeqCst); let mut drop_sessions = self.0.drop_sessions.lock(); drop_sessions.push_back(stream_id); } @@ -379,6 +378,7 @@ mod tests { use crate::upstreams::peer::HttpPeer; use bytes::{BufMut, BytesMut}; use http::Version; + use log::warn; use pingora_error::Result; use pingora_http::RequestHeader; @@ -505,4 +505,65 @@ mod tests { let h3_5 = connector.reused_http_session(&peer).await.unwrap().unwrap(); assert_eq!(id, h3_5.conn().id()); } + + #[tokio::test] + async fn test_connector_sequential_quic_http3() -> Result<()> { + let (_server_handle, mut peer) = quic_listener_peer()?; + peer.options.max_h3_streams = 100; + + let connector = Connector::new(None); + let sample_size = 1000; + for s in 0..sample_size { + let (mut session, r) = get_session(&connector, &peer).await?; + warn!("session acquired: {}/{} reused: {}", 0, s, r); + request(&mut session, &peer).await?; + } + Ok(()) + } + + async fn get_session(connector: &Connector, peer: &HttpPeer) -> Result<(HttpSession, bool)> { + if let Some(h3) = connector.reused_http_session(peer).await? { + warn!("reused session is some"); + Ok((HttpSession::H3(h3), true)) + } else { + let session = connector.new_http_session(peer).await?; + Ok((session, false)) + } + } + + async fn request(session: &mut HttpSession, peer: &HttpPeer) -> Result<()> { + let mut req = RequestHeader::build("GET", b"/", Some(3))?; + req.insert_header(http::header::HOST, peer.sni())?; + + let body_base = "hello world\n"; + let body_string = body_base.to_string(); + let mut body_send = BytesMut::new(); + body_send.put(body_string.as_bytes()); + + warn!("write_request_header"); + session.write_request_header(Box::new(req)).await?; + warn!("write_request_body"); + session + .write_request_body(body_send.freeze(), false) + .await?; + warn!("finish_request_body"); + session.finish_request_body().await?; + warn!("read_response_header"); + session.read_response_header().await?; + + let resp = session.response_header(); + assert!(resp.is_some()); + if let Some(resp) = resp { + assert_eq!(resp.status.as_str(), "200"); + assert_eq!(resp.version, Version::HTTP_3); + } + + let mut resp_body = BytesMut::new(); + while let Some(body) = session.read_response_body().await? { + assert!(body.len() < MAX_IPV6_QUIC_DATAGRAM_SIZE * 64); + resp_body.put(body) + } + assert_eq!(resp_body.as_ref(), body_string.as_bytes()); + Ok(()) + } } diff --git a/pingora-core/src/protocols/http/client.rs b/pingora-core/src/protocols/http/client.rs index c4ede7603..aedb97082 100644 --- a/pingora-core/src/protocols/http/client.rs +++ b/pingora-core/src/protocols/http/client.rs @@ -61,7 +61,7 @@ impl HttpSession { Ok(()) } HttpSession::H2(h2) => h2.write_request_header(req, false), - HttpSession::H3(h3) => h3.write_request_header(req).await, + HttpSession::H3(h3) => h3.write_request_header(req), } } diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index fdd05b9ff..6903069ae 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -84,6 +84,7 @@ impl Http3Session { impl Drop for Http3Session { fn drop(&mut self) { // TODO: clarify if a RESET_STREAM should be sent + // drop session in case initialized if let Some(stream_id) = self.stream_id { self.conn.drop_session(stream_id); debug!( @@ -92,6 +93,7 @@ impl Drop for Http3Session { stream_id ) } + // always decrease counter self.conn.release_stream(); } } @@ -115,7 +117,7 @@ impl Http3Session { } /// Write the request header to the server - pub async fn write_request_header(&mut self, req: Box) -> Result<()> { + pub fn write_request_header(&mut self, req: Box) -> Result<()> { if self.request_header_written.is_some() { // cannot send again warn!("request not sent as session already sent a request"); @@ -123,17 +125,13 @@ impl Http3Session { } let headers = request_headers_to_event(&req)?; - self.send_request(&headers, false).await?; + self.send_request(&headers, false)?; self.request_header_written = Some(req); Ok(()) } - async fn send_request( - &mut self, - headers: &[T], - fin: bool, - ) -> Result { + fn send_request(&mut self, headers: &[T], fin: bool) -> Result { // sending the request creates the underlying quic stream & according stream id // it is not possible to check the stream capacity before sending the request let stream_id = { diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index 149b8c406..fcb4f81e1 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -135,8 +135,8 @@ impl ConnectionRx { match conn.recv(&mut buf[..size], recv_info) { Ok(_size) => { debug!("connection {:?} received {}", conn_id, size); - self.tx_notify.notify_waiters(); self.rx_notify.notify_waiters(); + self.tx_notify.notify_waiters(); } Err(e) => { // If an error occurs while processing data, the connection is closed with diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 98013247a..e8414a02f 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -357,6 +357,8 @@ impl QuicHttp3Configs { quic.set_initial_max_stream_data_bidi_local(1_000_000); // 1 Mb quic.set_initial_max_stream_data_bidi_remote(1_000_000); // 1 Mb quic.set_initial_max_stream_data_uni(1_000_000); // 1 Mb + + // TODO: config through peer.options.max_h3_streams quic.set_initial_max_streams_bidi(100); quic.set_initial_max_streams_uni(100); diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index c8dcf1297..f5150eca6 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -153,7 +153,7 @@ async fn test_listener_quic_http3() -> Result<()> { assert_eq!(content_type, &b"text/html".to_vec()); assert_eq!(content_length, &body.len().to_string().as_bytes().to_vec()); assert_eq!(resp_body[0], body.as_slice().to_vec()); - + tokio::time::sleep(Duration::MAX).await; Ok(()) } diff --git a/pingora-core/tests/utils/mod.rs b/pingora-core/tests/utils/mod.rs index 4361474e2..07df068d9 100644 --- a/pingora-core/tests/utils/mod.rs +++ b/pingora-core/tests/utils/mod.rs @@ -100,7 +100,7 @@ fn entry_point(opt: Option) { let mut echo_service_http = Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); - echo_service_http.threads = Some(4); + echo_service_http.threads = Some(1); my_server.add_service(echo_service_http); my_server.run_forever(); From c487dca0c5fbe69ca3fa1a690b8d2abad1f1249c Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 30 Jan 2025 15:32:31 +0100 Subject: [PATCH 41/52] fix race data/timeout issue during high load --- pingora-core/src/connectors/http/v3.rs | 55 ++++++++++++++++--- pingora-core/src/protocols/http/v3/client.rs | 14 +++-- pingora-core/src/protocols/http/v3/mod.rs | 33 +++++++++-- pingora-core/src/protocols/http/v3/server.rs | 9 ++- pingora-core/src/protocols/l4/quic/mod.rs | 2 +- pingora-core/src/protocols/tls/quic/client.rs | 4 +- pingora-core/tests/test_basic.rs | 10 ++-- 7 files changed, 101 insertions(+), 26 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 836bdde7f..730a22a16 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -381,6 +381,9 @@ mod tests { use log::warn; use pingora_error::Result; use pingora_http::RequestHeader; + use tokio::task::JoinSet; + + const ITER_SIZE: usize = 128; #[tokio::test] async fn test_listener_connector_quic_http3() -> Result<()> { @@ -512,15 +515,44 @@ mod tests { peer.options.max_h3_streams = 100; let connector = Connector::new(None); - let sample_size = 1000; - for s in 0..sample_size { + for s in 0..ITER_SIZE * ITER_SIZE { let (mut session, r) = get_session(&connector, &peer).await?; - warn!("session acquired: {}/{} reused: {}", 0, s, r); + debug!("session acquired: {}/{} reused: {}", 0, s, r); request(&mut session, &peer).await?; } Ok(()) } + #[tokio::test] + async fn test_connector_parallel_quic_http3() -> Result<()> { + let (_server_handle, mut peer) = quic_listener_peer()?; + peer.options.max_h3_streams = 10; + + let mut joinset = JoinSet::>::new(); + for c in 0..ITER_SIZE { + let peer = peer.clone(); + joinset.spawn(async move { + let connector = Connector::new(None); + for _s in 0..ITER_SIZE { + let (mut session, _r) = get_session(&connector, &peer).await?; + request(&mut session, &peer).await?; + } + Ok(c) + }); + } + + let mut seen = [false; ITER_SIZE]; + while let Some(res) = joinset.join_next().await { + let idx = res.unwrap().unwrap(); + seen[idx] = true; + } + + for task in seen.iter() { + assert!(task); + } + Ok(()) + } + async fn get_session(connector: &Connector, peer: &HttpPeer) -> Result<(HttpSession, bool)> { if let Some(h3) = connector.reused_http_session(peer).await? { warn!("reused session is some"); @@ -540,16 +572,25 @@ mod tests { let mut body_send = BytesMut::new(); body_send.put(body_string.as_bytes()); - warn!("write_request_header"); + debug!("write_request_header"); session.write_request_header(Box::new(req)).await?; - warn!("write_request_body"); + let h3_session = session.as_http3().unwrap(); + let conn = h3_session.conn(); + let stream_id = session.as_http3().unwrap().stream_id()?; + + debug!( + "connection={:?} stream={}", + conn.conn_id(), + h3_session.stream_id()? + ); session .write_request_body(body_send.freeze(), false) .await?; - warn!("finish_request_body"); + debug!("write_request_body"); session.finish_request_body().await?; - warn!("read_response_header"); + debug!("finish_request_body {}", stream_id); session.read_response_header().await?; + debug!("read_response_header"); let resp = session.response_header(); assert!(resp.is_some()); diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index 6903069ae..47ef8d14a 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -198,7 +198,9 @@ impl Http3Session { }; let read_timeout = self.read_timeout; - tokio::select! { + tokio::select! { /* biased, poll data first */ + // to avoid timeout race wins in high load scenarios when data could be available + biased; res = headers_event(self.stream_id()?, self.event_rx()?) => { let (headers, _) = res?; let map = event_to_response_headers(&headers)?; @@ -212,7 +214,7 @@ impl Http3Session { Ok(()) } - fn stream_id(&self) -> Result { + pub(crate) fn stream_id(&self) -> Result { let Some(stream_id) = self.stream_id else { return Err(Error::explain(H3Error, "stream id not present")); }; @@ -235,7 +237,9 @@ impl Http3Session { } let read_timeout = self.read_timeout; - tokio::select! { + tokio::select! { /* biased, poll data first */ + // to avoid timeout race wins in high load scenarios when data could be available + biased; res = async { if !self.read_continue { data_finished_event(self.stream_id()?, self.event_rx()?).await @@ -316,7 +320,9 @@ impl Http3Session { // only possible when supported by the version of HTTP in use and enabled by an explicit // framing mechanism let read_timeout = self.read_timeout; - let trailer_map = tokio::select! { + let trailer_map = tokio::select! { /* biased, poll data first */ + // to avoid timeout race wins in high load scenarios when data could be available + biased; res = headers_event(self.stream_id()?, self.event_rx()?) => { let (trailers, _) = res?; headervec_to_headermap(&trailers)? diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 050c475f1..ddf3b5a0f 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -169,6 +169,11 @@ impl ConnectionIo { debug_assert_eq!(sent_len, data.len()); if end { + trace!( + "connection {:?} sent FIN flag for stream id {}", + self.conn_id(), + stream_id + ); self.tx_notify.notify_waiters(); } @@ -193,7 +198,11 @@ impl ConnectionIo { |e| format! {"Writing h3 request body finished to downstream failed. {e}"}, )?; self.tx_notify.notify_waiters(); - trace!("sent FIN flag for stream id {}", stream_id); + trace!( + "connection {:?} sent FIN flag for stream id {}", + self.conn_id(), + stream_id + ); Ok(()) } @@ -211,11 +220,15 @@ impl ConnectionIo { break true; } } - Err(quiche::h3::Error::Done) => { + Err(h3::Error::Done) => { // poll for next Http3 event // Event::Finished is only emitted after recv_body is Done self.rx_notify.notify_waiters(); - trace!("read_body done"); + trace!( + "connection {:?} reading body for stream {} done", + self.conn_id(), + stream_id + ); break false; } Err(e) => { @@ -274,6 +287,9 @@ impl ConnectionIo { D: FnMut(&mut StreamIdHashMap>), A: FnMut(&mut StreamIdHashMap>), { + // register before housekeeping to avoid notify misses in high-load scenarios + let data_future = self.rx_notify.notified(); + match error { h3::Error::Done => { debug!("H3 connection {:?} no events available", self.conn_id()); @@ -314,8 +330,10 @@ impl ConnectionIo { } // race for new data on connection or timeout - tokio::select! { - _data = self.rx_notify.notified() => { /* continue */ } + tokio::select! { /* biased, poll data first */ + // to avoid timeout race wins in high load scenarios when data could be available + biased; + _data = data_future => { /* continue */ } _timedout = async { if let Some(timeout) = timeout { debug!("connection {:?} timeout {:?}", self.conn_id(), timeout); @@ -567,7 +585,10 @@ async fn data_finished_event(stream_id: u64, event_rx: &mut Receiver) -> None => { return Err(Error::explain( ReadError, - "H3 session event channel disconnected", + format!( + "H3 session event channel disconnected fn {} stream {}", + "data_finished_event", stream_id + ), )) } } diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 1cb6b1e1a..2f6628842 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -342,7 +342,9 @@ impl Http3Session { ) }; - conn.max_accepted_stream_id = session.stream_id; + if session.stream_id > conn.max_accepted_stream_id { + conn.max_accepted_stream_id = session.stream_id; + } return Ok(Some(session)); } } @@ -592,7 +594,10 @@ impl Http3Session { None => { return Err(Error::explain( ReadError, - "H3 session event channel disconnected", + format!( + "H3 session event channel disconnected fn {} stream {}", + "reset_event", self.stream_id + ), )) } } diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index e8414a02f..f2d8709c0 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -350,7 +350,7 @@ impl QuicHttp3Configs { // TODO: usable for mTLS? // quic.verify_peer(); default server = false; client = true - quic.set_max_idle_timeout(60 * 1000); // default ulimited + quic.set_max_idle_timeout(5 * 1000); // default ulimited quic.set_max_recv_udp_payload_size(MAX_IPV6_BUF_SIZE); // recv default is 65527 quic.set_max_send_udp_payload_size(MAX_IPV6_QUIC_DATAGRAM_SIZE); // send default is 1200 quic.set_initial_max_data(10_000_000); // 10 Mb diff --git a/pingora-core/src/protocols/tls/quic/client.rs b/pingora-core/src/protocols/tls/quic/client.rs index c74b2b28d..029a0e76a 100644 --- a/pingora-core/src/protocols/tls/quic/client.rs +++ b/pingora-core/src/protocols/tls/quic/client.rs @@ -22,7 +22,7 @@ use crate::protocols::l4::quic::{ }; use crate::protocols::IO; use crate::upstreams::peer::Peer; -use log::{info, trace}; +use log::{debug, trace}; use parking_lot::Mutex; use pingora_error::ErrorType::HandshakeError; use pingora_error::{Error, ErrorType, OrErr}; @@ -121,7 +121,7 @@ where format!("failed to generate initial handshake packet {:?}", e) })? }; - info!( + debug!( "connection {:?} outgoing from {:} to {:}", conn_id, local_addr, peer_addr ); diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index f5150eca6..054c5cb90 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -80,9 +80,10 @@ async fn test_listener_quic_http3() -> Result<()> { utils::init(); info!("Startup completed.."); + let host = "openrusty.org"; let config = Config::new() .with_connect_to("127.0.0.1:6147".to_string()) - .with_host_port("openrusty.org:6147".to_string()) + .with_host_port(format!("{}:6147", host)) .with_idle_timeout(2000) .verify_peer(false) .build() @@ -92,7 +93,7 @@ async fn test_listener_quic_http3() -> Result<()> { let headers = vec![ Header::new(b":method", b"POST"), Header::new(b":scheme", b"https"), - Header::new(b":authority", b"openrusty.org"), + Header::new(b":authority", host.as_bytes()), Header::new(b":path", b"/"), Header::new(b"content-length", body.len().to_string().as_bytes()), ]; @@ -162,9 +163,10 @@ async fn test_listener_quic_http3_timeout() -> Result<()> { utils::init(); info!("Startup completed.."); + let host = "openrusty.org"; let config = Config::new() .with_connect_to("127.0.0.1:6147".to_string()) - .with_host_port("openrusty.org:6147".to_string()) + .with_host_port(format!("{}:6147", host)) .with_idle_timeout(3000) .verify_peer(false) .build() @@ -174,7 +176,7 @@ async fn test_listener_quic_http3_timeout() -> Result<()> { let headers = vec![ Header::new(b":method", b"POST"), Header::new(b":scheme", b"https"), - Header::new(b":authority", b"openrusty.org"), + Header::new(b":authority", host.as_bytes()), Header::new(b":path", b"/"), Header::new(b"content-length", body.len().to_string().as_bytes()), ]; From b33c18270965a9fd478640c7bafaa50efec3a2d6 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 30 Jan 2025 15:50:37 +0100 Subject: [PATCH 42/52] add testcase and histograms needs cargo --nocapture as plot library uses println --- pingora-core/Cargo.toml | 2 + pingora-core/src/connectors/http/v3.rs | 162 +++++++++++++++++++++++-- pingora-core/src/connectors/l4.rs | 2 +- pingora-core/src/connectors/mod.rs | 14 ++- 4 files changed, 163 insertions(+), 17 deletions(-) diff --git a/pingora-core/Cargo.toml b/pingora-core/Cargo.toml index ace853cbb..3e7e3079b 100644 --- a/pingora-core/Cargo.toml +++ b/pingora-core/Cargo.toml @@ -88,6 +88,8 @@ h3i = { git = 'https://github.com/cloudflare/quiche.git', rev = "5d2031ca" } [target.'cfg(unix)'.dev-dependencies] hyperlocal = "0.8" jemallocator = "0.5" +histogram = "0.11" +textplots = "0.8" [features] default = ["boringssl"] diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 730a22a16..cfb3fbaa8 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -377,17 +377,20 @@ mod tests { use crate::protocols::l4::quic::MAX_IPV6_QUIC_DATAGRAM_SIZE; use crate::upstreams::peer::HttpPeer; use bytes::{BufMut, BytesMut}; + use histogram::{AtomicHistogram, Histogram}; use http::Version; - use log::warn; + use log::info; use pingora_error::Result; use pingora_http::RequestHeader; + use std::time::Instant; + use textplots::{Chart, Plot, Shape}; use tokio::task::JoinSet; - const ITER_SIZE: usize = 128; + const ITER_SIZE: usize = 32; #[tokio::test] async fn test_listener_connector_quic_http3() -> Result<()> { - let (_server_handle, peer) = quic_listener_peer()?; + let (_server_handle, peer) = quic_listener_peer().await?; let connector = Connector::new(None); let mut session = connector.new_http_session(&peer).await?; @@ -509,33 +512,122 @@ mod tests { assert_eq!(id, h3_5.conn().id()); } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_connector_sequential_quic_http3() -> Result<()> { - let (_server_handle, mut peer) = quic_listener_peer()?; + let (_server_handle, mut peer) = quic_listener_peer().await?; peer.options.max_h3_streams = 100; + let mut req_counter = 0usize; + let mut histogram = + Histogram::new(7, 32).explain_err(InternalError, |_| "failed to crate histogram")?; + let timing = Instant::now(); + let connector = Connector::new(None); - for s in 0..ITER_SIZE * ITER_SIZE { + for s in 0..ITER_SIZE.pow(3) { + let timer = Instant::now(); let (mut session, r) = get_session(&connector, &peer).await?; debug!("session acquired: {}/{} reused: {}", 0, s, r); request(&mut session, &peer).await?; + let time_taken = timer.elapsed().as_micros() as u64; + histogram + .add(time_taken, 1) + .explain_err(InternalError, |_| "failed to add to histogram")?; + req_counter += 1; } + assert_eq!(req_counter, ITER_SIZE.pow(3)); + + let diff = timing.elapsed(); + info!("successful requests {}", req_counter); + info!("total duration {} milli seconds", diff.as_millis()); + print_histogram(histogram)?; Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_connector_parallel_quic_http3() -> Result<()> { - let (_server_handle, mut peer) = quic_listener_peer()?; + let (_server_handle, mut peer) = quic_listener_peer().await?; peer.options.max_h3_streams = 10; + let req_counter = Arc::new(AtomicUsize::new(0)); + let histogram = Arc::new( + AtomicHistogram::new(7, 32) + .explain_err(InternalError, |_| "failed to crate histogram")?, + ); + let timing = Instant::now(); + + let mut joinset = JoinSet::>::new(); + for c in 0..ITER_SIZE / 4 { + let peer = peer.clone(); + let req_counter = req_counter.clone(); + let histogram = histogram.clone(); + joinset.spawn(async move { + let connector = Connector::new(None); + for _s in 0..ITER_SIZE * ITER_SIZE { + let timer = Instant::now(); + // always use a new connection + let mut session = connector.new_http_session(&peer).await?; + request(&mut session, &peer).await?; + + let time_taken = timer.elapsed().as_micros() as u64; + histogram + .add(time_taken, 1) + .explain_err(InternalError, |_| "failed to add to histogram")?; + req_counter.fetch_add(1, Ordering::SeqCst); + } + Ok(c) + }); + } + + let mut seen = [false; ITER_SIZE / 4]; + while let Some(res) = joinset.join_next().await { + let idx = res.unwrap().unwrap(); + seen[idx] = true; + } + let diff = timing.elapsed(); + + for task in seen.iter() { + assert!(task); + } + + let req_counter = req_counter.load(Ordering::SeqCst); + //assert_eq!(req_counter, ITER_SIZE * ITER_SIZE * 2); + + info!("successful requests {}", req_counter); + info!("total duration {} milli seconds", diff.as_millis()); + + let histogram = histogram.load(); + print_histogram(histogram)?; + Ok(()) + } + + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn test_connector_parallel_sequential_quic_http3() -> Result<()> { + let (_server_handle, mut peer) = quic_listener_peer().await?; + peer.options.max_h3_streams = 10; + + let req_counter = Arc::new(AtomicUsize::new(0)); + let histogram = Arc::new( + AtomicHistogram::new(7, 32) + .explain_err(InternalError, |_| "failed to crate histogram")?, + ); + let timing = Instant::now(); + let mut joinset = JoinSet::>::new(); for c in 0..ITER_SIZE { let peer = peer.clone(); + let req_counter = req_counter.clone(); + let histogram = histogram.clone(); joinset.spawn(async move { let connector = Connector::new(None); - for _s in 0..ITER_SIZE { + for _s in 0..ITER_SIZE.pow(2) { + let timer = Instant::now(); let (mut session, _r) = get_session(&connector, &peer).await?; request(&mut session, &peer).await?; + let time_taken = timer.elapsed().as_micros() as u64; + histogram + .add(time_taken, 1) + .explain_err(InternalError, |_| "failed to add to histogram")?; + req_counter.fetch_add(1, Ordering::SeqCst); } Ok(c) }); @@ -546,16 +638,66 @@ mod tests { let idx = res.unwrap().unwrap(); seen[idx] = true; } + let diff = timing.elapsed(); for task in seen.iter() { assert!(task); } + + let req_counter = req_counter.load(Ordering::SeqCst); + assert_eq!(req_counter, ITER_SIZE.pow(3)); + + info!("successful requests {}", req_counter); + info!("total duration {} milli seconds", diff.as_millis()); + + let histogram = histogram.load(); + print_histogram(histogram)?; + Ok(()) + } + + // requires cargo --nocapture + fn print_histogram(histogram: Histogram) -> Result<()> { + let log_percentiles = [50f64, 75f64, 80f64, 90f64, 95f64, 99f64]; + let mut percentile_values = vec![]; + for i in 1..100 { + percentile_values.push(f64::from(i)) + } + + let percentiles = histogram + .percentiles(percentile_values.as_slice()) + .explain_err(InternalError, |_| "failed to generate percentiles")? + .unwrap(); + + let mut points_duration = vec![]; + let mut points_amount = vec![]; + info!("percentiles:"); + for (percentile, bucket) in percentiles { + let range_start = *bucket.range().start() as f32 / 1000f32; + let range_end = *bucket.range().end() as f32 / 1000f32; + + if log_percentiles.contains(&percentile) { + info!("{}th = {}ms - {}ms", percentile, range_start, range_end) + } + + points_duration.push((percentile as f32, (range_start + range_end) / 2f32)); + points_amount.push((percentile as f32, bucket.count() as f32)); + } + + println!("x = percentiles, y = milliseconds"); + Chart::new(200, 100, 0.0, 100.0) + .lineplot(&Shape::Lines(&points_duration)) + .nice(); + + println!("x = percentiles, y = requests"); + Chart::new(200, 100, 0.0, 100.0) + .lineplot(&Shape::Lines(&points_amount)) + .nice(); + Ok(()) } async fn get_session(connector: &Connector, peer: &HttpPeer) -> Result<(HttpSession, bool)> { if let Some(h3) = connector.reused_http_session(peer).await? { - warn!("reused session is some"); Ok((HttpSession::H3(h3), true)) } else { let session = connector.new_http_session(peer).await?; diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index 0b5cf9f45..8ab3c724e 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -675,7 +675,7 @@ mod quic_tests { #[tokio::test] async fn test_connector_quic_handshake() -> Result<()> { - let (_server_handle, peer) = quic_listener_peer()?; + let (_server_handle, peer) = quic_listener_peer().await?; let mut pre_handshake_stream = connect(&peer, None).await?; assert!(pre_handshake_stream.quic_connection_state().is_some()); diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index 364ddbd81..4d5679568 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -592,13 +592,14 @@ pub(crate) mod quic_tests { use std::thread::JoinHandle; use std::time::Duration; - pub(crate) fn quic_listener_peer() -> Result<(JoinHandle<()>, HttpPeer)> { + pub(crate) async fn quic_listener_peer() -> Result<(JoinHandle<()>, HttpPeer)> { + env_logger::builder() + .format_timestamp(Some(env_logger::TimestampPrecision::Nanos)) + .init(); + + info!("Starting listener..."); let port = 6147u16; fn inner(port: u16) { - env_logger::builder() - .format_timestamp(Some(env_logger::TimestampPrecision::Nanos)) - .init(); - let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR")); let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR")); @@ -627,7 +628,8 @@ pub(crate) mod quic_tests { ); peer.options.set_http_version(3, 3); - info!("Startup completed.."); + tokio::time::sleep(Duration::from_millis(500)).await; + info!("Startup completed."); Ok((server_handle, peer)) } From f0a3125ffbe9701b9f4bd05d2a6524599589feac Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 30 Jan 2025 17:35:52 +0100 Subject: [PATCH 43/52] fix handshake establishing locking --- pingora-core/src/connectors/http/v3.rs | 73 +++++++++++---- pingora-core/src/connectors/mod.rs | 4 +- pingora-core/src/protocols/http/v3/client.rs | 9 +- pingora-core/src/protocols/http/v3/mod.rs | 4 +- .../src/protocols/l4/quic/listener.rs | 88 ++++++++----------- pingora-core/src/protocols/l4/quic/mod.rs | 40 ++------- pingora-core/src/protocols/tls/quic/server.rs | 48 ++++++++-- 7 files changed, 151 insertions(+), 115 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index cfb3fbaa8..f48559fbf 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -379,14 +379,15 @@ mod tests { use bytes::{BufMut, BytesMut}; use histogram::{AtomicHistogram, Histogram}; use http::Version; - use log::info; - use pingora_error::Result; + use log::{error, info}; + use pingora_error::{ErrorType, Result}; use pingora_http::RequestHeader; + use std::net::SocketAddr; use std::time::Instant; use textplots::{Chart, Plot, Shape}; use tokio::task::JoinSet; - const ITER_SIZE: usize = 32; + const ITER_SIZE: usize = 42; #[tokio::test] async fn test_listener_connector_quic_http3() -> Result<()> { @@ -549,6 +550,7 @@ mod tests { peer.options.max_h3_streams = 10; let req_counter = Arc::new(AtomicUsize::new(0)); + let failed_req_counter = Arc::new(AtomicUsize::new(0)); let histogram = Arc::new( AtomicHistogram::new(7, 32) .explain_err(InternalError, |_| "failed to crate histogram")?, @@ -556,17 +558,34 @@ mod tests { let timing = Instant::now(); let mut joinset = JoinSet::>::new(); - for c in 0..ITER_SIZE / 4 { + for c in 0..ITER_SIZE { let peer = peer.clone(); let req_counter = req_counter.clone(); + let failed_req_counter = failed_req_counter.clone(); let histogram = histogram.clone(); joinset.spawn(async move { - let connector = Connector::new(None); + let mut options = ConnectorOptions::new(128); + let socket_addr: SocketAddr = format!("127.0.0.{}:0", c) + .parse() + .explain_err(ErrorType::BindError, |_| "failed to parse socket addr")?; + options.bind_to_v4 = vec![socket_addr]; + let connector = Connector::new(Some(options)); + for _s in 0..ITER_SIZE * ITER_SIZE { let timer = Instant::now(); // always use a new connection - let mut session = connector.new_http_session(&peer).await?; - request(&mut session, &peer).await?; + let mut session = match connector.new_http_session(&peer).await { + Ok(session) => session, + Err(e) => { + failed_req_counter.fetch_add(1, Ordering::SeqCst); + error!("{}", e); + continue; + } + }; + match request(&mut session, &peer).await { + Ok(_) => req_counter.fetch_add(1, Ordering::SeqCst), + Err(_) => failed_req_counter.fetch_add(1, Ordering::SeqCst), + }; let time_taken = timer.elapsed().as_micros() as u64; histogram @@ -578,7 +597,7 @@ mod tests { }); } - let mut seen = [false; ITER_SIZE / 4]; + let mut seen = [false; ITER_SIZE]; while let Some(res) = joinset.join_next().await { let idx = res.unwrap().unwrap(); seen[idx] = true; @@ -590,9 +609,11 @@ mod tests { } let req_counter = req_counter.load(Ordering::SeqCst); + let failed_req_counter = failed_req_counter.load(Ordering::SeqCst); //assert_eq!(req_counter, ITER_SIZE * ITER_SIZE * 2); info!("successful requests {}", req_counter); + info!("failed requests {}", failed_req_counter); info!("total duration {} milli seconds", diff.as_millis()); let histogram = histogram.load(); @@ -714,26 +735,42 @@ mod tests { let mut body_send = BytesMut::new(); body_send.put(body_string.as_bytes()); - debug!("write_request_header"); - session.write_request_header(Box::new(req)).await?; let h3_session = session.as_http3().unwrap(); let conn = h3_session.conn(); + debug!("connection={:?} write_request_header", conn.conn_id()); + session.write_request_header(Box::new(req)).await?; let stream_id = session.as_http3().unwrap().stream_id()?; - debug!( - "connection={:?} stream={}", + "connection={:?} stream={} write_request_body", conn.conn_id(), - h3_session.stream_id()? + stream_id ); session .write_request_body(body_send.freeze(), false) .await?; - debug!("write_request_body"); + debug!( + "connection={:?} stream={} finish_request_body", + conn.conn_id(), + stream_id + ); session.finish_request_body().await?; - debug!("finish_request_body {}", stream_id); - session.read_response_header().await?; - debug!("read_response_header"); - + debug!( + "connection={:?} stream={} read_response_header", + conn.conn_id(), + stream_id + ); + match session.read_response_header().await { + Ok(_) => {} + Err(e) => { + error!("{}", e); + return Err(e); + } + }; + debug!( + "connection={:?} stream={} response_header", + conn.conn_id(), + stream_id + ); let resp = session.response_header(); assert!(resp.is_some()); if let Some(resp) = resp { diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index 4d5679568..f6ff28cfe 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -138,7 +138,7 @@ pub struct TransportConnector { preferred_http_version: PreferredHttpVersion, } -const DEFAULT_POOL_SIZE: usize = 128; +const DEFAULT_POOL_SIZE: usize = 16; impl TransportConnector { /// Create a new [TransportConnector] with the given [ConnectorOptions] @@ -611,7 +611,7 @@ pub(crate) mod quic_tests { let mut echo_service_http = Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); - echo_service_http.threads = Some(4); + echo_service_http.threads = Some(1); my_server.add_service(echo_service_http); my_server.run_forever(); diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index 47ef8d14a..ca545ab5a 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -440,7 +440,10 @@ async fn headers_event( None => { return Err(Error::explain( ReadError, - "H3 session event channel disconnected", + format!( + "H3 session event channel disconnected fn {} stream {}", + "headers_event", stream_id + ), )) } } @@ -544,11 +547,11 @@ fn housekeeping_add_sessions( "connection {:?} stream {} was already present in sessions", conn_id, stream_id ); - debug_assert!(false) + debug_assert!(false, "session already present") } None => { debug!( - "connection {:?} added stream id {} to sessions", + "connection {:?} added stream {} to sessions", conn_id, stream_id ) } diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index ddf3b5a0f..dd3b8f06b 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -170,7 +170,7 @@ impl ConnectionIo { if end { trace!( - "connection {:?} sent FIN flag for stream id {}", + "connection {:?} sent FIN flag for stream {} body", self.conn_id(), stream_id ); @@ -199,7 +199,7 @@ impl ConnectionIo { )?; self.tx_notify.notify_waiters(); trace!( - "connection {:?} sent FIN flag for stream id {}", + "connection {:?} sent FIN flag for stream {} finish", self.conn_id(), stream_id ); diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index d86b65061..d86b0f777 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -147,18 +147,7 @@ impl Listener { debug!("endpoint rx loop"); 'read: loop { // receive from network and parse Quic header - let (size, from) = match self.socket_details.io.try_recv_from(&mut rx_buf) { - Ok((size, from)) => (size, from), - Err(e) => { - if e.kind() == ErrorKind::WouldBlock { - // no more UDP packets to read for now, wait for new packets - self.socket_details.io.readable().await?; - continue 'read; - } else { - return Err(e); - } - } - }; + let (size, from) = self.socket_details.io.recv_from(&mut rx_buf).await?; // cleanup connections { @@ -196,7 +185,6 @@ impl Listener { }; let mut conn_id = header.dcid.clone(); - let mut udp_tx = None; let mut established_handle = None; // send to corresponding connection let mut handle; @@ -221,28 +209,47 @@ impl Listener { let mut needs_establish = None; match handle { IncomingConnectionHandle::Handshake(i) => { - let resp; { - resp = i.response.lock().take(); - } - if let Some(resp) = resp { - match resp { - HandshakeResponse::Established(e) => { - debug!( - "connection {:?} received HandshakeResponse::Established", - conn_id - ); - established_handle = Some(e.clone()); - needs_establish = Some(e); + // hold on to the lock while writing to udp_tx to avoid dropping packets + // during establishing the connection + let mut resp = i.response.lock(); + if let Some(resp) = resp.take() { + match resp { + HandshakeResponse::Established(e) => { + debug!( + "connection {:?} received HandshakeResponse::Established", + conn_id + ); + // receive data into existing connection + established_handle = Some(e.clone()); + needs_establish = Some(e); + } + HandshakeResponse::Ignored => { + // drop connection + //self.connections.remove(&header.dcid); + let mut drop_connections = self.drop_connections.lock(); + drop_connections.push_back(header.dcid); + continue 'read; + } } - HandshakeResponse::Ignored => { - // drop connection - self.connections.remove(&header.dcid); - continue 'read; + } else { + // receive data on UDP channel + // use try_send as sync method to avoid await point while holding lock + match i.udp_tx + .try_send(UdpRecv { + pkt: rx_buf[..size].to_vec(), + header, + recv_info, + }) + { + Ok(()) => {} + Err(e) => warn!( + "sending dgram to connection {:?} failed with error: {}, dropping dgram", + conn_id, e + ), } + continue 'read; } - } else { - udp_tx = Some(i.udp_tx.clone()); } } IncomingConnectionHandle::Established(e) => { @@ -275,25 +282,6 @@ impl Listener { } } - // receive data on UDP channel - if let Some(udp_tx) = udp_tx { - match udp_tx - .send(UdpRecv { - pkt: rx_buf[..size].to_vec(), - header, - recv_info, - }) - .await - { - Ok(()) => {} - Err(e) => warn!( - "sending dgram to connection {:?} failed with error: {}", - conn_id, e - ), - } - continue 'read; - } - if header.ty != Type::Initial { debug!( "Quic packet type is not \"Initial\". Header: {:?}. Continuing...", diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index f2d8709c0..8f20bca3f 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -34,7 +34,6 @@ use std::task::{Context, Poll}; use std::{io, mem}; use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio::net::UdpSocket; -use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::Notify; pub(crate) mod connector; @@ -462,38 +461,15 @@ impl Connection { } match self { Connection::IncomingHandshake(s) => { - 'drain: loop { - match s.udp_rx.try_recv() { - Ok(mut dgram) => { - let mut conn = state.connection.lock(); - conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) - .explain_err(ErrorType::HandshakeError, |_| { - "receiving dgram failed" - })?; - debug!( - "connection {:?} dgram received while establishing", - s.connection_id - ) - } - Err(e) => { - match e { - TryRecvError::Empty => { - // stop accepting packets - s.udp_rx.close(); - } - TryRecvError::Disconnected => { - // remote already closed channel - } - } - break 'drain; - } - } + if !s.udp_rx.is_empty() { + error!( + "connection {:?} established udp_rx={}", + state.connection_id, + s.udp_rx.len() + ); + } else { + debug!("connection {:?} established", state.connection_id); } - debug_assert!( - s.udp_rx.is_empty(), - "udp rx channel must be empty when establishing the connection" - ); - debug!("connection {:?} established", state.connection_id); let _ = mem::replace(self, Connection::IncomingEstablished(state)); Ok(()) } diff --git a/pingora-core/src/protocols/tls/quic/server.rs b/pingora-core/src/protocols/tls/quic/server.rs index c8345bd32..4dc72ad73 100644 --- a/pingora-core/src/protocols/tls/quic/server.rs +++ b/pingora-core/src/protocols/tls/quic/server.rs @@ -30,6 +30,7 @@ use quiche::ConnectionId; use std::net::SocketAddr; use std::sync::Arc; use tokio::net::UdpSocket; +use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::Notify; pub(crate) async fn handshake(mut stream: L4Stream) -> pingora_error::Result { @@ -298,11 +299,8 @@ async fn handshake_inner( let tx_notify = Arc::new(Notify::new()); let rx_notify = Arc::new(Notify::new()); - debug!( - "connection {:?} handshake successful, udp_rx {}", - conn_id, - udp_rx.len() - ); + debug!("connection {:?} handshake successful", conn_id); + let handle = EstablishedHandle { connection_id: conn_id.clone(), connection: connection.clone(), @@ -311,9 +309,45 @@ async fn handshake_inner( }; { + // hold the lock while draining the channel to avoid pkt receiving issues during establishing the handle let mut resp = response.lock(); + 'drain: loop { + if !udp_rx.is_empty() { + error!( + "connection {:?} established udp_rx {}", + conn_id, + udp_rx.len() + ); + } + match udp_rx.try_recv() { + Ok(mut dgram) => { + let mut conn = connection.lock(); + conn.recv(dgram.pkt.as_mut_slice(), dgram.recv_info) + .explain_err(ErrorType::HandshakeError, |_| "receiving dgram failed")?; + debug!("connection {:?} dgram received while establishing", conn_id) + } + Err(e) => { + match e { + TryRecvError::Empty => break 'drain, + TryRecvError::Disconnected => { + // remote already closed channel + // not an issue, HandshakeResponse already processed + error!("connection {:?} channel disconnected", conn_id) + } + } + } + } + } + + assert_eq!( + udp_rx.len(), + 0, + "udp rx channel must be empty when establishing the connection" + ); *resp = Some(HandshakeResponse::Established(handle)); } + // release the lock, next packet will be received on the connection + // the Listener::accept() needs to hold the lock while writing to the udp_tx channel let tx = ConnectionTx { socket_details: socket_details.clone(), @@ -333,13 +367,11 @@ async fn handshake_inner( rx_notify: rx_notify.clone(), tx_notify: tx_notify.clone(), - tx_handle: tokio::spawn(tx.start()), + tx_handle: tokio::spawn(tx.start()), // sends HANDSHAKE_DONE Quic frame on established connection drop_connection: drop_connection.clone(), socket: socket.clone(), }; - // send HANDSHAKE_DONE Quic frame on established connection - e_state.tx_notify.notify_waiters(); Ok(Some(e_state)) } From ca159103affadbfb57dfcad1ae69d048f41dc641 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Thu, 30 Jan 2025 18:15:09 +0100 Subject: [PATCH 44/52] logging, cleanups, comments --- pingora-core/src/connectors/http/v3.rs | 1 - pingora-core/src/protocols/http/v3/client.rs | 21 ++++++++----------- pingora-core/src/protocols/http/v3/mod.rs | 4 ++-- pingora-core/src/protocols/http/v3/server.rs | 1 + .../src/protocols/l4/quic/connector.rs | 1 + pingora-core/src/protocols/l4/quic/mod.rs | 14 +++++++------ pingora-core/src/protocols/tls/quic/server.rs | 3 ++- 7 files changed, 23 insertions(+), 22 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index f48559fbf..473c3978c 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -591,7 +591,6 @@ mod tests { histogram .add(time_taken, 1) .explain_err(InternalError, |_| "failed to add to histogram")?; - req_counter.fetch_add(1, Ordering::SeqCst); } Ok(c) }); diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index ca545ab5a..489cd083a 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -460,23 +460,15 @@ pub(crate) struct Http3Poll { impl Http3Poll { pub(crate) async fn start(mut self) -> Result<()> { - let conn_id = self.conn_io.id.clone(); 'poll: loop { - let res = { + let poll = { let mut qconn = self.conn_io.quic.lock(); - if qconn.is_closed() { - self.idle_close.send_replace(true); - break 'poll Err(Error::explain( - H3Error, - format!("quic connection {:?} is closed stopping", conn_id), - )); - } - let mut hconn = self.conn_io.http3.lock(); + // NOTE: poll() drives the entire Quic/HTTP3 connection hconn.poll(&mut qconn) }; - let (stream_id, ev) = match res { + let (stream_id, ev) = match poll { Ok((stream, ev)) => (stream, ev), Err(e) => { let conn_id = self.conn_id().clone(); @@ -503,6 +495,7 @@ impl Http3Poll { if conn_alive { continue 'poll; } else { + self.idle_close.send_replace(true); break 'poll Ok(()); } } @@ -516,7 +509,11 @@ impl Http3Poll { let Some(session) = self.sessions.get_mut(&stream_id) else { return Err(Error::explain( InternalError, - format!("missing session channel for stream id {}", stream_id), + format!( + "connection {:?} missing session channel for stream {}", + self.conn_io.conn_id(), + stream_id + ), )); }; session diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index dd3b8f06b..3e0f86586 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -349,7 +349,7 @@ impl ConnectionIo { self.conn_id(), sessions.len()); } let mut qconn = self.quic.lock(); - // closes connection + // potentially closes connection qconn.on_timeout(); if let Some(timeout) = timeout { debug!("connection {:?} timed out {:?}", self.conn_id(), timeout); @@ -359,7 +359,7 @@ impl ConnectionIo { Ok(true) // signal continue } _ => { - // If an error occurs while processing data, the connection is closed with + // if an error occurs while processing data, the connection is closed with // the appropriate error code, using the transport’s close() method. // send the close() event diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 2f6628842..532a4b4d1 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -89,6 +89,7 @@ pub(crate) async fn handshake( } }; + debug!("connection {:?} http3 handshake finished", conn_io.id); Ok(Http3Connection { _l4stream: io, diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index fcb4f81e1..60c92aaf7 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -101,6 +101,7 @@ impl ConnectionRx { let mut buf = [0u8; MAX_IPV6_BUF_SIZE]; debug!("connection {:?} rx read", conn_id); 'read: loop { + // TODO: replace with recv.await() let (size, recv_info) = match socket.try_recv_from(&mut buf) { Ok((size, from)) => { trace!( diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 8f20bca3f..623df4690 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -634,21 +634,23 @@ pub(crate) fn handle_connection_errors( peer_error: Option<&quiche::ConnectionError>, ) -> Result<()> { if let Some(e) = local_error { - error!( + let error_msg = format!( "connection {:?} local error {}", conn_id, - String::from_utf8_lossy(e.reason.as_slice()).to_string() + String::from_utf8_lossy(e.reason.as_slice()) ); - return Err(e).explain_err(ConnectionClosed, |_| "local connection error"); + debug!("{}", error_msg); + return Err(e).explain_err(ConnectionClosed, |_| error_msg); } if let Some(e) = peer_error { - error!( + let error_msg = format!( "connection {:?} peer error {}", conn_id, - String::from_utf8_lossy(e.reason.as_slice()).to_string() + String::from_utf8_lossy(e.reason.as_slice()) ); - return Err(e).explain_err(ConnectionClosed, |_| "peer connection error"); + debug!("{}", error_msg); + return Err(e).explain_err(ConnectionClosed, |_| error_msg); } Ok(()) diff --git a/pingora-core/src/protocols/tls/quic/server.rs b/pingora-core/src/protocols/tls/quic/server.rs index 4dc72ad73..a30e0bc98 100644 --- a/pingora-core/src/protocols/tls/quic/server.rs +++ b/pingora-core/src/protocols/tls/quic/server.rs @@ -309,7 +309,8 @@ async fn handshake_inner( }; { - // hold the lock while draining the channel to avoid pkt receiving issues during establishing the handle + // hold the lock while draining the channel + // avoid pkt receiving issues during establishing let mut resp = response.lock(); 'drain: loop { if !udp_rx.is_empty() { From 48cd7bc5e009f4b3043fe7121bc4fcf46de9b374 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 31 Jan 2025 09:49:02 +0100 Subject: [PATCH 45/52] correctly close server connections, avoid tx_notify misses --- pingora-core/src/protocols/http/v3/client.rs | 11 ++++++----- pingora-core/src/protocols/http/v3/mod.rs | 2 +- pingora-core/src/protocols/http/v3/server.rs | 7 +++++-- pingora-core/src/protocols/l4/quic/mod.rs | 10 ++++++---- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index 489cd083a..a7c0a56ad 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -492,11 +492,12 @@ impl Http3Poll { fn_add_sessions, ) .await?; - if conn_alive { - continue 'poll; - } else { - self.idle_close.send_replace(true); - break 'poll Ok(()); + match conn_alive { + true => continue 'poll, + false => { + self.idle_close.send_replace(true); + break 'poll Ok(()); + } } } }; diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 3e0f86586..f3d6f0423 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -169,12 +169,12 @@ impl ConnectionIo { debug_assert_eq!(sent_len, data.len()); if end { + self.tx_notify.notify_waiters(); trace!( "connection {:?} sent FIN flag for stream {} body", self.conn_id(), stream_id ); - self.tx_notify.notify_waiters(); } Ok(sent_len) diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 532a4b4d1..18dc8de98 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -359,10 +359,13 @@ impl Http3Session { housekeeping_drop_sessions(&conn_id, sessions, drop_sessions) }; - conn.conn_io + let conn_active = conn.conn_io .error_or_timeout_data_race(e, &mut conn.sessions, fn_drop_sessions, |_| {}) .await?; - continue 'poll; + match conn_active { + true => continue 'poll, + false => return Ok(None), + } } } } diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 623df4690..b5c3f1a9d 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -141,7 +141,7 @@ impl ConnectionTx { let id = self.connection_id; let mut out = [0u8; MAX_IPV6_BUF_SIZE]; - let mut finished_sending = false; + let mut finished_sending = None; debug!("connection {:?} tx write", id); 'write: loop { let mut continue_write = false; @@ -173,7 +173,8 @@ impl ConnectionTx { Err(e) => { if e == quiche::Error::Done { trace!("connection {:?} send finished", id); - finished_sending = true; + // register notify before socket send to avoid misses under high load + finished_sending = Some(self.tx_notify.notified()); break 'fill; } error!("connection {:?} send error: {:?}", id, e); @@ -239,9 +240,10 @@ impl ConnectionTx { continue 'write; } - if finished_sending { + if let Some(tx_notified) = finished_sending { trace!("connection {:?} finished sending", id); - self.tx_notify.notified().await; + tx_notified.await; + finished_sending = None; continue 'write; } } From 0d601d65a27be05f65c03c9ad2e2b00b04f9b4bd Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 31 Jan 2025 10:12:04 +0100 Subject: [PATCH 46/52] notify send on timeout, stop ConnectionTx task on connection is_draining --- pingora-core/src/protocols/http/v3/mod.rs | 6 ++++-- pingora-core/src/protocols/http/v3/server.rs | 3 ++- pingora-core/src/protocols/l4/quic/mod.rs | 10 ++++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index f3d6f0423..109bfdb0c 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -345,14 +345,16 @@ impl ConnectionIo { } => { drop_sessions(sessions); if !sessions.is_empty() { - warn!("connection {:?} timed out with {} open sessions", + debug!("connection {:?} timed out with {} open sessions", self.conn_id(), sessions.len()); } let mut qconn = self.quic.lock(); // potentially closes connection qconn.on_timeout(); + self.tx_notify.notify_waiters(); + if let Some(timeout) = timeout { - debug!("connection {:?} timed out {:?}", self.conn_id(), timeout); + trace!("connection {:?} timed out {:?}", self.conn_id(), timeout); } } } diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 18dc8de98..e177bfffa 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -359,7 +359,8 @@ impl Http3Session { housekeeping_drop_sessions(&conn_id, sessions, drop_sessions) }; - let conn_active = conn.conn_io + let conn_active = conn + .conn_io .error_or_timeout_data_race(e, &mut conn.sessions, fn_drop_sessions, |_| {}) .await?; match conn_active { diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index b5c3f1a9d..81295629e 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -162,6 +162,11 @@ impl ConnectionTx { 'fill: while total_write < max_send_burst { let send = { let mut conn = self.connection.lock(); + if conn.is_draining() { + // Once is_draining() returns true, it is no longer necessary + // to call send() and all calls will return Done. + break 'write Ok(()); + } conn.send(&mut out[total_write..max_send_burst]) }; @@ -178,10 +183,7 @@ impl ConnectionTx { break 'fill; } error!("connection {:?} send error: {:?}", id, e); - /* TODO: close connection - let mut conn = self.connection.lock(); - conn.close(false, 0x1, b"fail").ok(); - */ + // TODO: close connection needed break 'write Err(Error::explain( ErrorType::WriteError, format!( From 1dd78d1745b1eb6109f0c0d6dfe3332ae0053eec Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Fri, 31 Jan 2025 11:43:36 +0100 Subject: [PATCH 47/52] run session housekeeping before polling h3 connection remove duplicated drop, tasks are stopped on the Connection drop --- pingora-core/src/connectors/http/v3.rs | 6 +++-- pingora-core/src/protocols/http/v3/client.rs | 27 ++++++------------- pingora-core/src/protocols/http/v3/mod.rs | 27 +++++-------------- pingora-core/src/protocols/http/v3/server.rs | 17 ++++++------ .../src/protocols/l4/quic/connector.rs | 13 --------- 5 files changed, 27 insertions(+), 63 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 473c3978c..5a71db5cb 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -609,12 +609,13 @@ mod tests { let req_counter = req_counter.load(Ordering::SeqCst); let failed_req_counter = failed_req_counter.load(Ordering::SeqCst); - //assert_eq!(req_counter, ITER_SIZE * ITER_SIZE * 2); info!("successful requests {}", req_counter); info!("failed requests {}", failed_req_counter); info!("total duration {} milli seconds", diff.as_millis()); + assert_eq!(req_counter, ITER_SIZE * ITER_SIZE * 2); + let histogram = histogram.load(); print_histogram(histogram)?; Ok(()) @@ -665,11 +666,12 @@ mod tests { } let req_counter = req_counter.load(Ordering::SeqCst); - assert_eq!(req_counter, ITER_SIZE.pow(3)); info!("successful requests {}", req_counter); info!("total duration {} milli seconds", diff.as_millis()); + assert_eq!(req_counter, ITER_SIZE.pow(3)); + let histogram = histogram.load(); print_histogram(histogram)?; Ok(()) diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index a7c0a56ad..e80532d1e 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -471,26 +471,9 @@ impl Http3Poll { let (stream_id, ev) = match poll { Ok((stream, ev)) => (stream, ev), Err(e) => { - let conn_id = self.conn_id().clone(); - - let drop_sessions = &self.drop_sessions.clone(); - let fn_drop_sessions = |sessions: &mut StreamIdHashMap>| { - housekeeping_drop_sessions(&conn_id, sessions, drop_sessions) - }; - - let add_sessions = &self.add_sessions.clone(); - let fn_add_sessions = |sessions: &mut StreamIdHashMap>| { - housekeeping_add_sessions(&conn_id, sessions, add_sessions) - }; - let conn_alive = self .conn_io - .error_or_timeout_data_race( - e, - &mut self.sessions, - fn_drop_sessions, - fn_add_sessions, - ) + .error_or_timeout_data_race(e, &self.sessions) .await?; match conn_alive { true => continue 'poll, @@ -523,7 +506,13 @@ impl Http3Poll { session .send(ev) .await - .explain_err(H3Error, |_| "failed to forward h3 event to session")? + .explain_err(H3Error, |_| "failed to forward h3 event to session")?; + + housekeeping_drop_sessions( + &self.conn_id().clone(), + &mut self.sessions, + &self.drop_sessions, + ); } } diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 109bfdb0c..7faa60842 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -57,7 +57,7 @@ pub(crate) struct ConnectionIo { /// the underlying Quic connection quic: Arc>, /// the actual HTTP 3 connection - http3: Arc>, + http3: Arc>, /// receive notification on Quic recv /// @@ -99,15 +99,12 @@ impl ConnectionIo { pub(crate) fn is_shutting_down(&self) -> bool { let qconn = self.quic.lock(); - qconn.is_draining() + qconn.is_draining() && !qconn.is_closed() } pub(crate) fn more_streams_available(&self) -> bool { let qconn = self.quic.lock(); - qconn.is_established() - && !qconn.is_closed() - && !qconn.is_draining() - && qconn.peer_streams_left_bidi() > 0 + qconn.is_established() && qconn.peer_streams_left_bidi() > 0 } fn capacity( @@ -276,17 +273,11 @@ impl ConnectionIo { self.tx_notify.notify_waiters() } - async fn error_or_timeout_data_race( + async fn error_or_timeout_data_race( &self, error: h3::Error, - sessions: &mut StreamIdHashMap>, - mut drop_sessions: D, - mut add_sessions: A, - ) -> Result - where - D: FnMut(&mut StreamIdHashMap>), - A: FnMut(&mut StreamIdHashMap>), - { + sessions: &StreamIdHashMap>, + ) -> Result { // register before housekeeping to avoid notify misses in high-load scenarios let data_future = self.rx_notify.notified(); @@ -295,9 +286,6 @@ impl ConnectionIo { debug!("H3 connection {:?} no events available", self.conn_id()); // TODO: in case PriorityUpdate was triggered call take_priority_update() here - add_sessions(sessions); - drop_sessions(sessions); - let timeout; { let qconn = self.quic.lock(); @@ -305,7 +293,7 @@ impl ConnectionIo { qconn.is_closed() || !(qconn.is_established() || qconn.is_in_early_data()); if is_closed { if !sessions.is_empty() { - warn!( + debug!( "H3 connection {:?} closed with open {} sessions", self.conn_id(), sessions.len() @@ -343,7 +331,6 @@ impl ConnectionIo { tokio::time::sleep(Duration::MAX).await } } => { - drop_sessions(sessions); if !sessions.is_empty() { debug!("connection {:?} timed out with {} open sessions", self.conn_id(), sessions.len()); diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index e177bfffa..6acc1ce15 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -124,7 +124,7 @@ impl Drop for Http3Connection { fn drop(&mut self) { let mut drop_connections = self.drop_connections.lock(); drop_connections.push_back(self.conn_id().clone()); - debug!("drop connection {:?}", self.conn_id()); + debug!("Http3Connection drop {:?}", self.conn_id()); } } @@ -352,16 +352,9 @@ impl Http3Session { } } Err(e) => { - let conn_id = conn.conn_id().clone(); - let drop_sessions = &conn.drop_sessions.clone(); - - let fn_drop_sessions = |sessions: &mut StreamIdHashMap>| { - housekeeping_drop_sessions(&conn_id, sessions, drop_sessions) - }; - let conn_active = conn .conn_io - .error_or_timeout_data_race(e, &mut conn.sessions, fn_drop_sessions, |_| {}) + .error_or_timeout_data_race(e, &conn.sessions) .await?; match conn_active { true => continue 'poll, @@ -369,6 +362,12 @@ impl Http3Session { } } } + + housekeeping_drop_sessions( + &conn.conn_id().clone(), + &mut conn.sessions, + &conn.drop_sessions, + ); } } diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index 60c92aaf7..2c0c611c1 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -156,16 +156,3 @@ impl ConnectionRx { } } } - -impl Drop for EstablishedState { - fn drop(&mut self) { - if !self.rx_handle.is_finished() { - self.rx_handle.abort(); - debug!("connection {:?} stopped rx task", self.connection_id) - } - if !self.tx_handle.is_finished() { - self.tx_handle.abort(); - debug!("connection {:?} stopped rx task", self.connection_id) - } - } -} From c18fe344eb1efa5286c36f84cd6e8b95e046f4ba Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Mon, 3 Feb 2025 16:27:30 +0100 Subject: [PATCH 48/52] add a housekeeping tick, to avoid stuck sessions avoid unused allocations, remove log --- .../src/protocols/l4/quic/listener.rs | 43 ++++++++++++------- pingora-core/src/protocols/l4/quic/mod.rs | 12 +----- pingora-core/src/protocols/l4/stream.rs | 6 +-- 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index d86b0f777..dd16c1074 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -31,6 +31,7 @@ use std::io::ErrorKind; use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; use std::sync::Arc; +use std::time::Duration; use std::{io, mem}; use tokio::net::UdpSocket; use tokio::sync::mpsc::{channel, Receiver, Sender}; @@ -147,23 +148,17 @@ impl Listener { debug!("endpoint rx loop"); 'read: loop { // receive from network and parse Quic header - let (size, from) = self.socket_details.io.recv_from(&mut rx_buf).await?; - - // cleanup connections - { - let mut drop_conn = self.drop_connections.lock(); - while let Some(drop_id) = drop_conn.pop_front() { - match self.connections.remove(&drop_id) { - None => warn!( - "failed to remove connection handle {:?} from connections", - drop_id - ), - Some(_) => { - debug!("removed connection handle {:?} from connections", drop_id) - } - } + let (size, from) = tokio::select! { + biased; + res = self.socket_details.io.recv_from(&mut rx_buf) => { res? }, + _housekeeping_tick = tokio::time::sleep(Duration::from_millis(100)) => { + // avoid stuck connections when traffic stops + self.housekeeping_connections_drop(); + continue; } - } + }; + + self.housekeeping_connections_drop(); // parse the Quic packet's header let header = match Header::from_slice(rx_buf[..size].as_mut(), quiche::MAX_CONN_ID_LEN) @@ -320,6 +315,22 @@ impl Listener { } } + fn housekeeping_connections_drop(&mut self) { + // cleanup connections + let mut drop_conn = self.drop_connections.lock(); + while let Some(drop_id) = drop_conn.pop_front() { + match self.connections.remove(&drop_id) { + None => warn!( + "failed to remove connection handle {:?} from connections", + drop_id + ), + Some(_) => { + debug!("removed connection handle {:?} from connections", drop_id) + } + } + } + } + fn recv_connection( conn_id: &ConnectionId<'_>, conn: &Mutex, diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index 81295629e..ce1b6ce23 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -464,16 +464,8 @@ impl Connection { ) } match self { - Connection::IncomingHandshake(s) => { - if !s.udp_rx.is_empty() { - error!( - "connection {:?} established udp_rx={}", - state.connection_id, - s.udp_rx.len() - ); - } else { - debug!("connection {:?} established", state.connection_id); - } + Connection::IncomingHandshake(_) => { + debug!("connection {:?} established", state.connection_id); let _ = mem::replace(self, Connection::IncomingEstablished(state)); Ok(()) } diff --git a/pingora-core/src/protocols/l4/stream.rs b/pingora-core/src/protocols/l4/stream.rs index 4c4d14a88..ba7ad8224 100644 --- a/pingora-core/src/protocols/l4/stream.rs +++ b/pingora-core/src/protocols/l4/stream.rs @@ -432,11 +432,7 @@ impl Stream { impl From for Stream { fn from(s: Connection) -> Self { Stream { - stream: BufStream::with_capacity( - BUF_READ_SIZE, - BUF_WRITE_SIZE, - RawStreamWrapper::new(RawStream::Quic(s)), - ), + stream: BufStream::with_capacity(0, 0, RawStreamWrapper::new(RawStream::Quic(s))), rewind_read_buf: Vec::new(), buffer_write: true, established_ts: SystemTime::now(), From 57a3a6099da89d47f0897b15183e0b7e2baa110a Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Mon, 3 Feb 2025 16:31:16 +0100 Subject: [PATCH 49/52] move TransportConnectors TLS context into Options to avoid duplicated CA initializations fix more_streams_allowed detection --- pingora-core/src/connectors/http/v3.rs | 22 +++++- pingora-core/src/connectors/l4.rs | 5 +- pingora-core/src/connectors/mod.rs | 78 +++++++++++++++---- pingora-core/src/protocols/tls/quic/server.rs | 2 +- pingora-core/tests/test_basic.rs | 1 - 5 files changed, 82 insertions(+), 26 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 5a71db5cb..89ab03db8 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -154,7 +154,7 @@ impl Connector { // connection offload is handled by the [TransportConnector] Self { - transport: TransportConnector::new(options), + transport: TransportConnector::new_http3(options), idle_pool: Arc::new(ConnectionPool::new(pool_size)), in_use_pool: InUsePool::new(), } @@ -299,7 +299,10 @@ impl ConnectionRef { } pub fn more_streams_allowed(&self) -> bool { - self.conn_io().more_streams_available() + let current = self.0.current_streams.load(Ordering::Relaxed); + !self.is_shutting_down() + && self.0.max_streams > current + && self.conn_io().more_streams_available() } } @@ -387,7 +390,7 @@ mod tests { use textplots::{Chart, Plot, Shape}; use tokio::task::JoinSet; - const ITER_SIZE: usize = 42; + const ITER_SIZE: usize = 16; #[tokio::test] async fn test_listener_connector_quic_http3() -> Result<()> { @@ -535,6 +538,7 @@ mod tests { .explain_err(InternalError, |_| "failed to add to histogram")?; req_counter += 1; } + assert_eq!(req_counter, ITER_SIZE.pow(3)); let diff = timing.elapsed(); @@ -588,6 +592,16 @@ mod tests { }; let time_taken = timer.elapsed().as_micros() as u64; + + match session { + HttpSession::H1(_) | HttpSession::H2(_) => {} + HttpSession::H3(h3_session) => connector.release_http_session( + h3_session, + &peer, + Some(Duration::from_secs(1)), + ), + } + histogram .add(time_taken, 1) .explain_err(InternalError, |_| "failed to add to histogram")?; @@ -614,7 +628,7 @@ mod tests { info!("failed requests {}", failed_req_counter); info!("total duration {} milli seconds", diff.as_millis()); - assert_eq!(req_counter, ITER_SIZE * ITER_SIZE * 2); + assert_eq!(req_counter, ITER_SIZE.pow(3)); let histogram = histogram.load(); print_histogram(histogram)?; diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index 8ab3c724e..71767609d 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -680,9 +680,8 @@ mod quic_tests { let mut pre_handshake_stream = connect(&peer, None).await?; assert!(pre_handshake_stream.quic_connection_state().is_some()); - let tls_connector = tls::Connector::new(None); - let quic_connector = tls::quic::Connector::new(None); - let mut stream = do_connect(&peer, None, None, &tls_connector, &quic_connector).await?; + let tls_connector = tls::quic::Connector::new(None); + let mut stream = do_connect(&peer, None, None, None, Some(&tls_connector)).await?; assert!(stream.quic_connection_state().is_some()); let connection = stream.quic_connection_state().unwrap(); diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index f6ff28cfe..d3ae30ec9 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -129,8 +129,8 @@ impl ConnectorOptions { /// [TransportConnector] provides APIs to connect to servers via TCP or TLS with connection reuse pub struct TransportConnector { - tls_ctx: tls::Connector, - quic_tls_ctx: tls::quic::Connector, + tls_ctx: Option, + quic_tls_ctx: Option, connection_pool: Arc>>>, offload: Option, bind_to_v4: Vec, @@ -156,8 +156,32 @@ impl TransportConnector { .as_ref() .map_or_else(Vec::new, |o| o.bind_to_v6.clone()); TransportConnector { - tls_ctx: tls::Connector::new(options.clone()), - quic_tls_ctx: tls::quic::Connector::new(options), + tls_ctx: Some(tls::Connector::new(options.clone())), + quic_tls_ctx: None, + connection_pool: Arc::new(ConnectionPool::new(pool_size)), + offload: offload.map(|v| OffloadRuntime::new(v.0, v.1)), + bind_to_v4, + bind_to_v6, + preferred_http_version: PreferredHttpVersion::new(), + } + } + + pub fn new_http3(mut options: Option) -> Self { + let pool_size = options + .as_ref() + .map_or(DEFAULT_POOL_SIZE, |c| c.keepalive_pool_size); + // Take the offloading setting there because this layer has implement offloading, + // so no need for stacks at lower layer to offload again. + let offload = options.as_mut().and_then(|o| o.offload_threadpool.take()); + let bind_to_v4 = options + .as_ref() + .map_or_else(Vec::new, |o| o.bind_to_v4.clone()); + let bind_to_v6 = options + .as_ref() + .map_or_else(Vec::new, |o| o.bind_to_v6.clone()); + TransportConnector { + tls_ctx: None, + quic_tls_ctx: Some(tls::quic::Connector::new(options.clone())), connection_pool: Arc::new(ConnectionPool::new(pool_size)), offload: offload.map(|v| OffloadRuntime::new(v.0, v.1)), bind_to_v4, @@ -181,7 +205,14 @@ impl TransportConnector { let tls_ctx = self.tls_ctx.clone(); let quic_tls_ctx = self.quic_tls_ctx.clone(); rt.spawn(async move { - do_connect(&peer, bind_to, alpn_override, &tls_ctx, &quic_tls_ctx).await + do_connect( + &peer, + bind_to, + alpn_override, + tls_ctx.as_ref(), + quic_tls_ctx.as_ref(), + ) + .await }) .await .or_err(InternalError, "offload runtime failure")?? @@ -190,8 +221,8 @@ impl TransportConnector { peer, bind_to, alpn_override, - &self.tls_ctx, - &self.quic_tls_ctx, + self.tls_ctx.as_ref(), + self.quic_tls_ctx.as_ref(), ) .await? }; @@ -310,8 +341,8 @@ async fn do_connect( peer: &P, bind_to: Option, alpn_override: Option, - tls_ctx: &tls::Connector, - quic_tls_ctx: &tls::quic::Connector, + tls_ctx: Option<&tls::Connector>, + quic_tls_ctx: Option<&tls::quic::Connector>, ) -> Result { // Create the future that does the connections, but don't evaluate it until // we decide if we need a timeout or not @@ -334,8 +365,8 @@ async fn do_connect_inner( peer: &P, bind_to: Option, alpn_override: Option, - tls_ctx: &tls::Connector, - quic_tls_ctx: &tls::quic::Connector, + tls_ctx: Option<&tls::Connector>, + quic_tls_ctx: Option<&tls::quic::Connector>, ) -> Result { let stream = l4_connect(peer, bind_to).await?; if peer.tls() { @@ -347,10 +378,24 @@ async fn do_connect_inner( )); } + let Some(quic_tls_ctx) = quic_tls_ctx else { + return Err(Error::explain( + HandshakeError, + "usage of HTTP3 requires a Quic TLS context", + )); + }; + let quic_stream = quic_handshake(stream, peer, alpn_override, &quic_tls_ctx.quic_http3).await?; Ok(Box::new(quic_stream)) } else { + let Some(tls_ctx) = tls_ctx else { + return Err(Error::explain( + HandshakeError, + "usage of HTTP1/2 with TLS enabled requires a TLS context", + )); + }; + let tls_stream = tls::connect(stream, peer, alpn_override, &tls_ctx.ctx).await?; Ok(Box::new(tls_stream)) } @@ -421,7 +466,6 @@ mod tests { use tls::Connector; use super::*; - use crate::connectors::tls::quic; use crate::upstreams::peer::BasicPeer; use tokio::io::AsyncWriteExt; #[cfg(unix)] @@ -532,8 +576,7 @@ mod tests { /// the decomposed error type and message async fn get_do_connect_failure_with_peer(peer: &BasicPeer) -> (ErrorType, String) { let tls_connector = Connector::new(None); - let quic_connector = quic::Connector::new(None); - let stream = do_connect(peer, None, None, &tls_connector, &quic_connector).await; + let stream = do_connect(peer, None, None, Some(&tls_connector), None).await; match stream { Ok(_) => panic!("should throw an error"), Err(e) => ( @@ -593,9 +636,10 @@ pub(crate) mod quic_tests { use std::time::Duration; pub(crate) async fn quic_listener_peer() -> Result<(JoinHandle<()>, HttpPeer)> { - env_logger::builder() + let _ = env_logger::builder() + .is_test(true) .format_timestamp(Some(env_logger::TimestampPrecision::Nanos)) - .init(); + .try_init(); info!("Starting listener..."); let port = 6147u16; @@ -611,7 +655,7 @@ pub(crate) mod quic_tests { let mut echo_service_http = Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp); - echo_service_http.threads = Some(1); + echo_service_http.threads = Some(4); my_server.add_service(echo_service_http); my_server.run_forever(); diff --git a/pingora-core/src/protocols/tls/quic/server.rs b/pingora-core/src/protocols/tls/quic/server.rs index a30e0bc98..1b32ee8ea 100644 --- a/pingora-core/src/protocols/tls/quic/server.rs +++ b/pingora-core/src/protocols/tls/quic/server.rs @@ -314,7 +314,7 @@ async fn handshake_inner( let mut resp = response.lock(); 'drain: loop { if !udp_rx.is_empty() { - error!( + debug!( "connection {:?} established udp_rx {}", conn_id, udp_rx.len() diff --git a/pingora-core/tests/test_basic.rs b/pingora-core/tests/test_basic.rs index 054c5cb90..a0bfc1946 100644 --- a/pingora-core/tests/test_basic.rs +++ b/pingora-core/tests/test_basic.rs @@ -154,7 +154,6 @@ async fn test_listener_quic_http3() -> Result<()> { assert_eq!(content_type, &b"text/html".to_vec()); assert_eq!(content_length, &body.len().to_string().as_bytes().to_vec()); assert_eq!(resp_body[0], body.as_slice().to_vec()); - tokio::time::sleep(Duration::MAX).await; Ok(()) } From 5785b86b1290f3cda3fd52c82a8ca9f3221f2d91 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Mon, 3 Feb 2025 17:58:36 +0100 Subject: [PATCH 50/52] merge fixes & update license headers for 2025 --- pingora-core/src/connectors/http/v3.rs | 2 +- pingora-core/src/connectors/l4.rs | 162 ++++++++---------- pingora-core/src/connectors/tls/quic/mod.rs | 6 +- pingora-core/src/protocols/http/server.rs | 1 + pingora-core/src/protocols/http/v3/client.rs | 2 +- pingora-core/src/protocols/http/v3/mod.rs | 2 +- pingora-core/src/protocols/http/v3/server.rs | 2 +- .../src/protocols/l4/quic/connector.rs | 2 +- .../src/protocols/l4/quic/listener.rs | 2 +- pingora-core/src/protocols/l4/quic/mod.rs | 2 +- pingora-core/src/protocols/tls/quic/client.rs | 2 +- pingora-core/src/protocols/tls/quic/mod.rs | 2 +- pingora-core/src/protocols/tls/quic/server.rs | 2 +- pingora-core/src/protocols/tls/quic/stream.rs | 2 +- pingora-core/src/tls/mod.rs | 2 +- 15 files changed, 86 insertions(+), 107 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 89ab03db8..49e24b9df 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index 541ae8128..251e05408 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -108,6 +108,7 @@ where if let Some(custom_l4) = peer.get_peer_options().and_then(|o| o.custom_l4.as_ref()) { custom_l4.connect(peer_addr).await? } else if peer.udp_http3() { + // create UDP sockets inner_udp_connect(peer, &bind_to, peer_addr).await? } else { // create TCP sockets @@ -156,103 +157,78 @@ where #[cfg(windows)] let raw = socket.as_raw_socket(); - if peer.tcp_fast_open() { - set_tcp_fastopen_connect(raw)?; - } - if let Some(recv_buf) = peer.tcp_recv_buf() { - debug!("Setting recv buf size"); - set_recv_buf(raw, recv_buf)?; - } - if let Some(dscp) = peer.dscp() { - debug!("Setting dscp"); - set_dscp(raw, dscp)?; - } - - if let Some(tweak_hook) = peer - .get_peer_options() - .and_then(|o| o.upstream_tcp_sock_tweak_hook.clone()) - { - tweak_hook(socket)?; - } - - Ok(()) - }); - let conn_res = match peer.connection_timeout() { - Some(t) => pingora_timeout::timeout(t, connect_future) - .await - .explain_err(ConnectTimedout, |_| { - format!("timeout {t:?} connecting to server {peer}") - })?, - None => connect_future.await, - }; - match conn_res { - Ok(socket) => { - debug!("connected to new server: {}", peer.address()); - Ok(socket.into()) - } - Err(e) => { - let c = format!("Fail to connect to {peer}"); - match e.etype() { - SocketError | BindError => Error::e_because(InternalError, c, e), - _ => Err(e.more_context(c)), - } - } - } + if peer.tcp_fast_open() { + set_tcp_fastopen_connect(raw)?; } - #[cfg(unix)] - SocketAddr::Unix(addr) => { - let connect_future = connect_uds( - addr.as_pathname() - .expect("non-pathname unix sockets not supported as peer"), - ); - let conn_res = match peer.connection_timeout() { - Some(t) => pingora_timeout::timeout(t, connect_future) - .await - .explain_err(ConnectTimedout, |_| { - format!("timeout {t:?} connecting to server {peer}") - })?, - None => connect_future.await, - }; - match conn_res { - Ok(socket) => { - debug!("connected to new server: {}", peer.address()); - Ok(socket.into()) - } - Err(e) => { - let c = format!("Fail to connect to {peer}"); - match e.etype() { - SocketError | BindError => Error::e_because(InternalError, c, e), - _ => Err(e.more_context(c)), - } - } - } + if let Some(recv_buf) = peer.tcp_recv_buf() { + debug!("Setting recv buf size"); + set_recv_buf(raw, recv_buf)?; + } + if let Some(dscp) = peer.dscp() { + debug!("Setting dscp"); + set_dscp(raw, dscp)?; } - }? - }; - let tracer = peer.get_tracer(); - if let Some(t) = tracer { - t.0.on_connected(); - stream.tracer = Some(t); - } + if let Some(tweak_hook) = peer + .get_peer_options() + .and_then(|o| o.upstream_tcp_sock_tweak_hook.clone()) + { + tweak_hook(socket)?; + } - // settings applied based on stream type - if let Some(ka) = peer.tcp_keepalive() { - stream.set_keepalive(ka)?; + Ok(()) + }); + let conn_res = match peer.connection_timeout() { + Some(t) => pingora_timeout::timeout(t, connect_future) + .await + .explain_err(ConnectTimedout, |_| { + format!("timeout {t:?} connecting to server {peer}") + })?, + None => connect_future.await, + }; + match conn_res { + Ok(socket) => { + debug!("connected to new server: {}", peer.address()); + Ok(socket.into()) + } + Err(e) => { + let c = format!("Fail to connect to {peer}"); + match e.etype() { + SocketError | BindError => Error::e_because(InternalError, c, e), + _ => Err(e.more_context(c)), + } + } + } + } + #[cfg(unix)] + SocketAddr::Unix(addr) => { + let connect_future = connect_uds( + addr.as_pathname() + .expect("non-pathname unix sockets not supported as peer"), + ); + let conn_res = match peer.connection_timeout() { + Some(t) => pingora_timeout::timeout(t, connect_future) + .await + .explain_err(ConnectTimedout, |_| { + format!("timeout {t:?} connecting to server {peer}") + })?, + None => connect_future.await, + }; + match conn_res { + Ok(socket) => { + debug!("connected to new server: {}", peer.address()); + Ok(socket.into()) + } + Err(e) => { + let c = format!("Fail to connect to {peer}"); + match e.etype() { + SocketError | BindError => Error::e_because(InternalError, c, e), + _ => Err(e.more_context(c)), + } + } + } + } } - stream.set_nodelay()?; - - #[cfg(unix)] - let digest = SocketDigest::from_raw_fd(stream.as_raw_fd()); - #[cfg(windows)] - let digest = SocketDigest::from_raw_socket(stream.as_raw_socket()); - digest - .peer_addr - .set(Some(peer_addr.clone())) - .expect("newly created OnceCell must be empty"); - stream.set_socket_digest(digest); - - Ok(stream) } /// create [`tokio::net::UdpSocket`] and a Quic [Connection](`crate::protocols::l4::quic::Connection::OutgoingHandshake`) diff --git a/pingora-core/src/connectors/tls/quic/mod.rs b/pingora-core/src/connectors/tls/quic/mod.rs index 1cc24463a..ce82c8329 100644 --- a/pingora-core/src/connectors/tls/quic/mod.rs +++ b/pingora-core/src/connectors/tls/quic/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -68,7 +68,9 @@ static INIT_CA_ENV: Once = Once::new(); fn init_ssl_cert_env_vars() { // this sets env vars to pick up the root certs // it is universal across openssl and boringssl - INIT_CA_ENV.call_once(openssl_probe::init_ssl_cert_env_vars); + // safety: although impossible to prove safe we assume it's safe since the call is + // wrapped in a call_once and it's unlikely other threads are reading these vars + INIT_CA_ENV.call_once(|| unsafe { openssl_probe::init_openssl_env_vars() }); } /// enables rustls & quic-boringssl usage diff --git a/pingora-core/src/protocols/http/server.rs b/pingora-core/src/protocols/http/server.rs index 65bf67b92..bbef83131 100644 --- a/pingora-core/src/protocols/http/server.rs +++ b/pingora-core/src/protocols/http/server.rs @@ -232,6 +232,7 @@ impl Session { match self { Self::H1(s) => s.set_read_timeout(timeout), Self::H2(_) => {} + Self::H3(_) => {} } } diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index e80532d1e..34a5bddbf 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 7faa60842..6f5cd30ec 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index 6acc1ce15..b064f440b 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index 2c0c611c1..80980be84 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/protocols/l4/quic/listener.rs b/pingora-core/src/protocols/l4/quic/listener.rs index dd16c1074..4cc8139ca 100644 --- a/pingora-core/src/protocols/l4/quic/listener.rs +++ b/pingora-core/src/protocols/l4/quic/listener.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index ce1b6ce23..dfbd64daa 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/protocols/tls/quic/client.rs b/pingora-core/src/protocols/tls/quic/client.rs index 029a0e76a..f31c35739 100644 --- a/pingora-core/src/protocols/tls/quic/client.rs +++ b/pingora-core/src/protocols/tls/quic/client.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/protocols/tls/quic/mod.rs b/pingora-core/src/protocols/tls/quic/mod.rs index 9688d457a..28b17386e 100644 --- a/pingora-core/src/protocols/tls/quic/mod.rs +++ b/pingora-core/src/protocols/tls/quic/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/protocols/tls/quic/server.rs b/pingora-core/src/protocols/tls/quic/server.rs index 1b32ee8ea..b5aa04154 100644 --- a/pingora-core/src/protocols/tls/quic/server.rs +++ b/pingora-core/src/protocols/tls/quic/server.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/protocols/tls/quic/stream.rs b/pingora-core/src/protocols/tls/quic/stream.rs index d53a9ea64..2949f35b2 100644 --- a/pingora-core/src/protocols/tls/quic/stream.rs +++ b/pingora-core/src/protocols/tls/quic/stream.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pingora-core/src/tls/mod.rs b/pingora-core/src/tls/mod.rs index 277b5b409..b97d7a315 100644 --- a/pingora-core/src/tls/mod.rs +++ b/pingora-core/src/tls/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2024 Cloudflare, Inc. +// Copyright 2025 Cloudflare, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. From 5501789cd84158be0e9d8d752df51b8ad448ddcd Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Tue, 4 Feb 2025 13:15:34 +0100 Subject: [PATCH 51/52] separate test listener ports --- pingora-core/src/connectors/http/v3.rs | 10 +++++----- pingora-core/src/connectors/l4.rs | 2 +- pingora-core/src/connectors/mod.rs | 5 ++--- pingora-core/src/protocols/l4/quic/connector.rs | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pingora-core/src/connectors/http/v3.rs b/pingora-core/src/connectors/http/v3.rs index 49e24b9df..763e7ad46 100644 --- a/pingora-core/src/connectors/http/v3.rs +++ b/pingora-core/src/connectors/http/v3.rs @@ -390,11 +390,11 @@ mod tests { use textplots::{Chart, Plot, Shape}; use tokio::task::JoinSet; - const ITER_SIZE: usize = 16; + const ITER_SIZE: usize = 8; #[tokio::test] async fn test_listener_connector_quic_http3() -> Result<()> { - let (_server_handle, peer) = quic_listener_peer().await?; + let (_server_handle, peer) = quic_listener_peer(6180).await?; let connector = Connector::new(None); let mut session = connector.new_http_session(&peer).await?; @@ -518,7 +518,7 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_connector_sequential_quic_http3() -> Result<()> { - let (_server_handle, mut peer) = quic_listener_peer().await?; + let (_server_handle, mut peer) = quic_listener_peer(6181).await?; peer.options.max_h3_streams = 100; let mut req_counter = 0usize; @@ -550,7 +550,7 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_connector_parallel_quic_http3() -> Result<()> { - let (_server_handle, mut peer) = quic_listener_peer().await?; + let (_server_handle, mut peer) = quic_listener_peer(6182).await?; peer.options.max_h3_streams = 10; let req_counter = Arc::new(AtomicUsize::new(0)); @@ -637,7 +637,7 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_connector_parallel_sequential_quic_http3() -> Result<()> { - let (_server_handle, mut peer) = quic_listener_peer().await?; + let (_server_handle, mut peer) = quic_listener_peer(6183).await?; peer.options.max_h3_streams = 10; let req_counter = Arc::new(AtomicUsize::new(0)); diff --git a/pingora-core/src/connectors/l4.rs b/pingora-core/src/connectors/l4.rs index 251e05408..d4081629d 100644 --- a/pingora-core/src/connectors/l4.rs +++ b/pingora-core/src/connectors/l4.rs @@ -754,7 +754,7 @@ mod quic_tests { #[tokio::test] async fn test_connector_quic_handshake() -> Result<()> { - let (_server_handle, peer) = quic_listener_peer().await?; + let (_server_handle, peer) = quic_listener_peer(6184).await?; let mut pre_handshake_stream = connect(&peer, None).await?; assert!(pre_handshake_stream.quic_connection_state().is_some()); diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index 622e30108..364ed4744 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -628,21 +628,20 @@ pub(crate) mod quic_tests { use async_trait::async_trait; use bytes::{BufMut, BytesMut}; use http::{Response, StatusCode}; - use log::info; + use log::{error, info}; use pingora_error::Result; use pingora_timeout::timeout; use std::thread; use std::thread::JoinHandle; use std::time::Duration; - pub(crate) async fn quic_listener_peer() -> Result<(JoinHandle<()>, HttpPeer)> { + pub(crate) async fn quic_listener_peer(port: u16) -> Result<(JoinHandle<()>, HttpPeer)> { let _ = env_logger::builder() .is_test(true) .format_timestamp(Some(env_logger::TimestampPrecision::Nanos)) .try_init(); info!("Starting listener..."); - let port = 6147u16; fn inner(port: u16) { let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR")); let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR")); diff --git a/pingora-core/src/protocols/l4/quic/connector.rs b/pingora-core/src/protocols/l4/quic/connector.rs index 80980be84..45b4cadb7 100644 --- a/pingora-core/src/protocols/l4/quic/connector.rs +++ b/pingora-core/src/protocols/l4/quic/connector.rs @@ -135,7 +135,7 @@ impl ConnectionRx { let mut conn = self.connection.lock(); match conn.recv(&mut buf[..size], recv_info) { Ok(_size) => { - debug!("connection {:?} received {}", conn_id, size); + debug!("connection {:?} received data length={}", conn_id, size); self.rx_notify.notify_waiters(); self.tx_notify.notify_waiters(); } From 537792b95477b467b3ffd02500430fbb455992e5 Mon Sep 17 00:00:00 2001 From: Harald Gutmann Date: Tue, 4 Feb 2025 13:16:22 +0100 Subject: [PATCH 52/52] register rx & tx notify directly after signals --- pingora-core/src/connectors/mod.rs | 2 +- pingora-core/src/protocols/http/v3/client.rs | 23 ++++++++++------- pingora-core/src/protocols/http/v3/mod.rs | 27 ++++++++++++-------- pingora-core/src/protocols/http/v3/server.rs | 23 ++++++++++------- pingora-core/src/protocols/l4/quic/mod.rs | 17 +++++++----- 5 files changed, 56 insertions(+), 36 deletions(-) diff --git a/pingora-core/src/connectors/mod.rs b/pingora-core/src/connectors/mod.rs index 364ed4744..5db875d48 100644 --- a/pingora-core/src/connectors/mod.rs +++ b/pingora-core/src/connectors/mod.rs @@ -628,7 +628,7 @@ pub(crate) mod quic_tests { use async_trait::async_trait; use bytes::{BufMut, BytesMut}; use http::{Response, StatusCode}; - use log::{error, info}; + use log::info; use pingora_error::Result; use pingora_timeout::timeout; use std::thread; diff --git a/pingora-core/src/protocols/http/v3/client.rs b/pingora-core/src/protocols/http/v3/client.rs index 34a5bddbf..034b32e38 100644 --- a/pingora-core/src/protocols/http/v3/client.rs +++ b/pingora-core/src/protocols/http/v3/client.rs @@ -460,6 +460,8 @@ pub(crate) struct Http3Poll { impl Http3Poll { pub(crate) async fn start(mut self) -> Result<()> { + let mut notified = self.conn_io.rx_notify.notified(); + 'poll: loop { let poll = { let mut qconn = self.conn_io.quic.lock(); @@ -471,13 +473,22 @@ impl Http3Poll { let (stream_id, ev) = match poll { Ok((stream, ev)) => (stream, ev), Err(e) => { + housekeeping_drop_sessions( + &self.conn_id().clone(), + &mut self.sessions, + &self.drop_sessions, + ); + let conn_alive = self .conn_io - .error_or_timeout_data_race(e, &self.sessions) + .error_or_timeout_data_race(e, notified, &self.sessions) .await?; match conn_alive { - true => continue 'poll, - false => { + Some(updated_notified) => { + notified = updated_notified; + continue 'poll; + } + None => { self.idle_close.send_replace(true); break 'poll Ok(()); } @@ -507,12 +518,6 @@ impl Http3Poll { .send(ev) .await .explain_err(H3Error, |_| "failed to forward h3 event to session")?; - - housekeeping_drop_sessions( - &self.conn_id().clone(), - &mut self.sessions, - &self.drop_sessions, - ); } } diff --git a/pingora-core/src/protocols/http/v3/mod.rs b/pingora-core/src/protocols/http/v3/mod.rs index 6f5cd30ec..704b7dcce 100644 --- a/pingora-core/src/protocols/http/v3/mod.rs +++ b/pingora-core/src/protocols/http/v3/mod.rs @@ -35,6 +35,7 @@ use std::future::Future; use std::pin::Pin; use std::sync::Arc; use std::time::Duration; +use tokio::sync::futures::Notified; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::Notify; @@ -276,11 +277,9 @@ impl ConnectionIo { async fn error_or_timeout_data_race( &self, error: h3::Error, + data_race: Notified<'_>, sessions: &StreamIdHashMap>, - ) -> Result { - // register before housekeeping to avoid notify misses in high-load scenarios - let data_future = self.rx_notify.notified(); - + ) -> Result> { match error { h3::Error::Done => { debug!("H3 connection {:?} no events available", self.conn_id()); @@ -310,7 +309,7 @@ impl ConnectionIo { qconn.local_error(), qconn.peer_error(), ) { - Ok(()) => Ok(false), // signal connection close + Ok(()) => Ok(None), // signal connection close Err(e) => Err(e), }; } @@ -318,10 +317,14 @@ impl ConnectionIo { } // race for new data on connection or timeout - tokio::select! { /* biased, poll data first */ + let notified = tokio::select! { /* biased, poll data first */ // to avoid timeout race wins in high load scenarios when data could be available biased; - _data = data_future => { /* continue */ } + _data = data_race => { + /* continue */ + // register notify right away to cover all notify signals + self.rx_notify.notified() + } _timedout = async { if let Some(timeout) = timeout { debug!("connection {:?} timeout {:?}", self.conn_id(), timeout); @@ -331,6 +334,9 @@ impl ConnectionIo { tokio::time::sleep(Duration::MAX).await } } => { + // register notify right away to cover all notify signals + let notified = self.rx_notify.notified(); + if !sessions.is_empty() { debug!("connection {:?} timed out with {} open sessions", self.conn_id(), sessions.len()); @@ -342,10 +348,11 @@ impl ConnectionIo { if let Some(timeout) = timeout { trace!("connection {:?} timed out {:?}", self.conn_id(), timeout); - } + }; + notified } - } - Ok(true) // signal continue + }; + Ok(Some(notified)) // signal continue } _ => { // if an error occurs while processing data, the connection is closed with diff --git a/pingora-core/src/protocols/http/v3/server.rs b/pingora-core/src/protocols/http/v3/server.rs index b064f440b..6247d37a3 100644 --- a/pingora-core/src/protocols/http/v3/server.rs +++ b/pingora-core/src/protocols/http/v3/server.rs @@ -248,6 +248,8 @@ impl Http3Session { conn: &mut Http3Connection, digest: Arc, ) -> Result> { + let mut notified = conn.conn_io.rx_notify.notified(); + 'poll: loop { let poll = { let mut qconn = conn.conn_io.quic.lock(); @@ -352,22 +354,25 @@ impl Http3Session { } } Err(e) => { + housekeeping_drop_sessions( + &conn.conn_id().clone(), + &mut conn.sessions, + &conn.drop_sessions, + ); + let conn_active = conn .conn_io - .error_or_timeout_data_race(e, &conn.sessions) + .error_or_timeout_data_race(e, notified, &conn.sessions) .await?; match conn_active { - true => continue 'poll, - false => return Ok(None), + Some(updated_notified) => { + notified = updated_notified; + continue 'poll; + } + None => return Ok(None), } } } - - housekeeping_drop_sessions( - &conn.conn_id().clone(), - &mut conn.sessions, - &conn.drop_sessions, - ); } } diff --git a/pingora-core/src/protocols/l4/quic/mod.rs b/pingora-core/src/protocols/l4/quic/mod.rs index dfbd64daa..5fae1878b 100644 --- a/pingora-core/src/protocols/l4/quic/mod.rs +++ b/pingora-core/src/protocols/l4/quic/mod.rs @@ -141,10 +141,11 @@ impl ConnectionTx { let id = self.connection_id; let mut out = [0u8; MAX_IPV6_BUF_SIZE]; - let mut finished_sending = None; + let mut notified = self.tx_notify.notified(); debug!("connection {:?} tx write", id); 'write: loop { let mut continue_write = false; + let mut finished_sending = false; // update tx stats & get current details let (max_dgram_size, max_send_burst) = self.tx_stats.max_send_burst(&self.connection); @@ -178,8 +179,7 @@ impl ConnectionTx { Err(e) => { if e == quiche::Error::Done { trace!("connection {:?} send finished", id); - // register notify before socket send to avoid misses under high load - finished_sending = Some(self.tx_notify.notified()); + finished_sending = true; break 'fill; } error!("connection {:?} send error: {:?}", id, e); @@ -206,7 +206,9 @@ impl ConnectionTx { if total_write == 0 || dst_info.is_none() { trace!("connection {:?} nothing to send", id); - self.tx_notify.notified().await; + notified.await; + // register notify right away to cover all notify signals + notified = self.tx_notify.notified(); continue 'write; } let dst_info = dst_info.unwrap(); @@ -242,10 +244,11 @@ impl ConnectionTx { continue 'write; } - if let Some(tx_notified) = finished_sending { + if finished_sending { trace!("connection {:?} finished sending", id); - tx_notified.await; - finished_sending = None; + notified.await; + // register notify right away to cover all notify signals + notified = self.tx_notify.notified(); continue 'write; } }