Skip to content

Commit

Permalink
Add ClickHouse native TCP ports to internal DNS (#6642)
Browse files Browse the repository at this point in the history
- Add a new DNS service for the ClickHouse Native TCP protocol,
`ServiceName::ClickhouseNative.
- Add a helper method to the DnsConfigBuilder for creating expected
ClickHouse replica records in internal DNS. This is used to ensure we
correctly make the records for _both_ the HTTP server and Native TCP
server, pointing to the same Omicron zone in the blueprint.
- Use this method in the DNS generation for a blueprint, and add the
native service to the tests to check that we include the SRV record for
it.
- Fixes #6598
  • Loading branch information
bnaecker authored Sep 25, 2024
1 parent f21d79d commit f40147b
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 38 deletions.
6 changes: 3 additions & 3 deletions dev-tools/reconfigurator-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -841,12 +841,12 @@ fn cmd_blueprint_diff(
&blueprint1,
&sleds_by_id,
&Default::default(),
);
)?;
let internal_dns_config2 = blueprint_internal_dns_config(
&blueprint2,
&sleds_by_id,
&Default::default(),
);
)?;
let dns_diff = DnsDiff::new(&internal_dns_config1, &internal_dns_config2)
.context("failed to assemble DNS diff")?;
swriteln!(rv, "internal DNS:\n{}", dns_diff);
Expand Down Expand Up @@ -920,7 +920,7 @@ fn cmd_blueprint_diff_dns(
blueprint,
&sleds_by_id,
&Default::default(),
)
)?
}
CliDnsGroup::External => blueprint_external_dns_config(
blueprint,
Expand Down
40 changes: 40 additions & 0 deletions internal-dns/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ use crate::names::{ServiceName, BOUNDARY_NTP_DNS_NAME, DNS_ZONE};
use anyhow::{anyhow, ensure};
use core::fmt;
use dns_service_client::types::{DnsConfigParams, DnsConfigZone, DnsRecord};
use omicron_common::address::CLICKHOUSE_TCP_PORT;
use omicron_common::api::external::Generation;
use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid};
use std::collections::BTreeMap;
Expand Down Expand Up @@ -398,6 +399,45 @@ impl DnsConfigBuilder {
self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port)
}

/// Higher-level shorthand for adding a ClickHouse zone with several
/// services.
///
/// ClickHouse servers expose several interfaces on the network. We use both
/// a simple HTTP interface as well as a lower-level protocol over TCP,
/// called the "Native protocol". This method inserts a zone and the related
/// records for both of these services.
///
/// `http_service` is the `ServiceName` for the HTTP service that belongs in
/// this zone, and `http_port` is the associated port for that service. The
/// native service is added automatically, using its default port.
///
/// # Errors
///
/// This fails if the provided `http_service` is not for a ClickHouse
/// replica server. It also fails if the given zone has already been added
/// to the configuration.
pub fn host_zone_clickhouse(
&mut self,
zone_id: OmicronZoneUuid,
underlay_address: Ipv6Addr,
http_service: ServiceName,
http_port: u16,
) -> anyhow::Result<()> {
anyhow::ensure!(
http_service == ServiceName::Clickhouse
|| http_service == ServiceName::ClickhouseServer,
"This method is only valid for ClickHouse replica servers, \
but we were provided the service '{http_service:?}'",
);
let zone = self.host_zone(zone_id, underlay_address)?;
self.service_backend_zone(http_service, &zone, http_port)?;
self.service_backend_zone(
ServiceName::ClickhouseNative,
&zone,
CLICKHOUSE_TCP_PORT,
)
}

/// Construct a `DnsConfigZone` describing the control plane zone described
/// up to this point
pub fn build_zone(self) -> DnsConfigZone {
Expand Down
9 changes: 9 additions & 0 deletions internal-dns/src/names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,15 @@ pub const DNS_ZONE_EXTERNAL_TESTING: &str = "oxide-dev.test";
/// Names of services within the control plane
#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
pub enum ServiceName {
/// The HTTP interface to a single-node ClickHouse server.
Clickhouse,
/// The native TCP interface to a ClickHouse server.
///
/// NOTE: This is used for either single-node or a replicated cluster.
ClickhouseNative,
/// The TCP interface to a ClickHouse Keeper server.
ClickhouseKeeper,
/// The HTTP interface to a replicated ClickHouse server.
ClickhouseServer,
Cockroach,
InternalDns,
Expand All @@ -48,6 +55,7 @@ impl ServiceName {
fn service_kind(&self) -> &'static str {
match self {
ServiceName::Clickhouse => "clickhouse",
ServiceName::ClickhouseNative => "clickhouse-native",
ServiceName::ClickhouseKeeper => "clickhouse-keeper",
ServiceName::ClickhouseServer => "clickhouse-server",
ServiceName::Cockroach => "cockroach",
Expand All @@ -74,6 +82,7 @@ impl ServiceName {
pub fn dns_name(&self) -> String {
match self {
ServiceName::Clickhouse
| ServiceName::ClickhouseNative
| ServiceName::ClickhouseKeeper
| ServiceName::ClickhouseServer
| ServiceName::Cockroach
Expand Down
59 changes: 43 additions & 16 deletions nexus/reconfigurator/execution/src/dns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub(crate) async fn deploy_dns(

// Next, construct the DNS config represented by the blueprint.
let internal_dns_zone_blueprint =
blueprint_internal_dns_config(blueprint, sleds_by_id, overrides);
blueprint_internal_dns_config(blueprint, sleds_by_id, overrides)?;
let silos = datastore
.silo_list_all_batched(opctx, Discoverability::All)
.await
Expand Down Expand Up @@ -250,7 +250,7 @@ pub fn blueprint_internal_dns_config(
blueprint: &Blueprint,
sleds_by_id: &BTreeMap<SledUuid, Sled>,
overrides: &Overridables,
) -> DnsConfigZone {
) -> Result<DnsConfigZone, Error> {
// The DNS names configured here should match what RSS configures for the
// same zones. It's tricky to have RSS share the same code because it uses
// Sled Agent's _internal_ `OmicronZoneConfig` (and friends), whereas we're
Expand All @@ -259,7 +259,7 @@ pub fn blueprint_internal_dns_config(
// the details.
let mut dns_builder = DnsConfigBuilder::new();

for (_, zone) in
'all_zones: for (_, zone) in
blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeInInternalDns)
{
let (service_name, port) = match &zone.zone_type {
Expand All @@ -271,13 +271,37 @@ pub fn blueprint_internal_dns_config(
) => (ServiceName::InternalNtp, address.port()),
BlueprintZoneType::Clickhouse(
blueprint_zone_type::Clickhouse { address, .. },
) => (ServiceName::Clickhouse, address.port()),
)
| BlueprintZoneType::ClickhouseServer(
blueprint_zone_type::ClickhouseServer { address, .. },
) => {
// Add the HTTP and native TCP interfaces for ClickHouse data
// replicas. This adds the zone itself, so we need to continue
// back up to the loop over all the Omicron zones, rather than
// falling through to call `host_zone_with_one_backend()`.
let http_service = if matches!(
&zone.zone_type,
BlueprintZoneType::Clickhouse(_)
) {
ServiceName::Clickhouse
} else {
ServiceName::ClickhouseServer
};
dns_builder
.host_zone_clickhouse(
zone.id,
zone.underlay_address,
http_service,
address.port(),
)
.map_err(|e| Error::InternalError {
internal_message: e.to_string(),
})?;
continue 'all_zones;
}
BlueprintZoneType::ClickhouseKeeper(
blueprint_zone_type::ClickhouseKeeper { address, .. },
) => (ServiceName::ClickhouseKeeper, address.port()),
BlueprintZoneType::ClickhouseServer(
blueprint_zone_type::ClickhouseServer { address, .. },
) => (ServiceName::ClickhouseServer, address.port()),
BlueprintZoneType::CockroachDb(
blueprint_zone_type::CockroachDb { address, .. },
) => (ServiceName::Cockroach, address.port()),
Expand All @@ -302,24 +326,22 @@ pub fn blueprint_internal_dns_config(
blueprint_zone_type::InternalDns { http_address, .. },
) => (ServiceName::InternalDns, http_address.port()),
};

// This unwrap is safe because this function only fails if we provide
// the same zone id twice, which should not be possible here.
dns_builder
.host_zone_with_one_backend(
zone.id,
zone.underlay_address,
service_name,
port,
)
.unwrap();
.map_err(|e| Error::InternalError {
internal_message: e.to_string(),
})?;
}

let scrimlets = sleds_by_id.values().filter(|sled| sled.is_scrimlet);
for scrimlet in scrimlets {
let sled_subnet = scrimlet.subnet();
let switch_zone_ip = overrides.switch_zone_ip(scrimlet.id, sled_subnet);
// unwrap(): see above.
dns_builder
.host_zone_switch(
scrimlet.id,
Expand All @@ -328,10 +350,12 @@ pub fn blueprint_internal_dns_config(
overrides.mgs_port(scrimlet.id),
overrides.mgd_port(scrimlet.id),
)
.unwrap();
.map_err(|e| Error::InternalError {
internal_message: e.to_string(),
})?;
}

dns_builder.build_zone()
Ok(dns_builder.build_zone())
}

pub fn blueprint_external_dns_config(
Expand Down Expand Up @@ -763,7 +787,8 @@ mod test {
&blueprint,
&BTreeMap::new(),
&Default::default(),
);
)
.unwrap();
assert!(blueprint_dns.records.is_empty());
}

Expand Down Expand Up @@ -887,7 +912,8 @@ mod test {
&blueprint,
&sleds_by_id,
&Default::default(),
);
)
.unwrap();
assert_eq!(blueprint_dns_zone.zone_name, DNS_ZONE);

// Now, verify a few different properties about the generated DNS
Expand Down Expand Up @@ -1054,6 +1080,7 @@ mod test {
// Tfport).
let mut srv_kinds_expected = BTreeSet::from([
ServiceName::Clickhouse,
ServiceName::ClickhouseNative,
ServiceName::Cockroach,
ServiceName::InternalDns,
ServiceName::ExternalDns,
Expand Down
44 changes: 35 additions & 9 deletions nexus/test-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,32 @@ impl RackInitRequestBuilder {
.service_backend_zone(service_name, &zone, address.port())
.expect("Failed to set up DNS for {kind}");
}

// Special handling of ClickHouse, which has multiple SRV records for its
// single zone.
fn add_clickhouse_dataset(
&mut self,
zpool_id: ZpoolUuid,
dataset_id: Uuid,
address: SocketAddrV6,
) {
self.datasets.push(DatasetCreateRequest {
zpool_id: zpool_id.into_untyped_uuid(),
dataset_id,
request: DatasetPutRequest {
address,
kind: DatasetKind::Clickhouse,
},
});
self.internal_dns_config
.host_zone_clickhouse(
OmicronZoneUuid::from_untyped_uuid(dataset_id),
*address.ip(),
internal_dns::ServiceName::Clickhouse,
address.port(),
)
.expect("Failed to setup ClickHouse DNS");
}
}

pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> {
Expand Down Expand Up @@ -453,29 +479,29 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
)
.await
.unwrap();
let port = clickhouse.http_address().port();

let zpool_id = ZpoolUuid::new_v4();
let dataset_id = Uuid::new_v4();
let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, port, 0, 0);
self.rack_init_builder.add_dataset(
let http_address = clickhouse.http_address();
let http_port = http_address.port();
self.rack_init_builder.add_clickhouse_dataset(
zpool_id,
dataset_id,
address,
DatasetKind::Clickhouse,
internal_dns::ServiceName::Clickhouse,
http_address,
);
self.clickhouse = Some(clickhouse);

// NOTE: We could pass this port information via DNS, rather than
// requiring it to be known before Nexus starts.
//
// See https://github.com/oxidecomputer/omicron/issues/6407.
self.config
.pkg
.timeseries_db
.address
.as_mut()
.expect("Tests expect to set a port of Clickhouse")
.set_port(port);
.set_port(http_port);

let pool_name = illumos_utils::zpool::ZpoolName::new_external(zpool_id)
.to_string()
Expand All @@ -484,11 +510,11 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
self.blueprint_zones.push(BlueprintZoneConfig {
disposition: BlueprintZoneDisposition::InService,
id: OmicronZoneUuid::from_untyped_uuid(dataset_id),
underlay_address: *address.ip(),
underlay_address: *http_address.ip(),
filesystem_pool: Some(ZpoolName::new_external(zpool_id)),
zone_type: BlueprintZoneType::Clickhouse(
blueprint_zone_type::Clickhouse {
address,
address: http_address,
dataset: OmicronZoneDataset { pool_name },
},
),
Expand Down
20 changes: 10 additions & 10 deletions sled-agent/src/rack_setup/plan/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -708,14 +708,14 @@ impl Plan {
};
let id = OmicronZoneUuid::new_v4();
let ip = sled.addr_alloc.next().expect("Not enough addrs");
let port = omicron_common::address::CLICKHOUSE_HTTP_PORT;
let address = SocketAddrV6::new(ip, port, 0, 0);
let http_port = omicron_common::address::CLICKHOUSE_HTTP_PORT;
let http_address = SocketAddrV6::new(ip, http_port, 0, 0);
dns_builder
.host_zone_with_one_backend(
.host_zone_clickhouse(
id,
ip,
ServiceName::Clickhouse,
port,
http_port,
)
.unwrap();
let dataset_name =
Expand All @@ -727,7 +727,7 @@ impl Plan {
underlay_address: ip,
zone_type: BlueprintZoneType::Clickhouse(
blueprint_zone_type::Clickhouse {
address,
address: http_address,
dataset: OmicronZoneDataset {
pool_name: dataset_name.pool().clone(),
},
Expand All @@ -751,14 +751,14 @@ impl Plan {
let ip = sled.addr_alloc.next().expect("Not enough addrs");
// TODO: This may need to be a different port if/when to have single node
// and replicated running side by side as per stage 1 of RFD 468.
let port = omicron_common::address::CLICKHOUSE_HTTP_PORT;
let address = SocketAddrV6::new(ip, port, 0, 0);
let http_port = omicron_common::address::CLICKHOUSE_HTTP_PORT;
let http_address = SocketAddrV6::new(ip, http_port, 0, 0);
dns_builder
.host_zone_with_one_backend(
.host_zone_clickhouse(
id,
ip,
ServiceName::ClickhouseServer,
port,
http_port,
)
.unwrap();
let dataset_name =
Expand All @@ -770,7 +770,7 @@ impl Plan {
underlay_address: ip,
zone_type: BlueprintZoneType::ClickhouseServer(
blueprint_zone_type::ClickhouseServer {
address,
address: http_address,
dataset: OmicronZoneDataset {
pool_name: dataset_name.pool().clone(),
},
Expand Down

0 comments on commit f40147b

Please sign in to comment.