From e42f3d1fa09e9fe1cb47f57a915a06b41e7bbe5f Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 24 Dec 2024 08:36:15 -0800
Subject: [PATCH 01/48] initial libcuml wheel

---
 BUILD.md                           |   1 -
 ci/build_wheel.sh                  |  47 ++-----------
 ci/build_wheel_cuml.sh             |  57 +++++++++++++++
 ci/build_wheel_libcuml.sh          |  63 +++++++++++++++++
 ci/test_wheel.sh                   |   7 +-
 ci/validate_wheel.sh               |   2 +
 cpp/README.md                      |   1 -
 dependencies.yaml                  | 104 ++++++++++++++++++++++++++-
 python/cuml/CMakeLists.txt         |  98 +++++++-------------------
 python/cuml/cuml/__init__.py       |  12 +++-
 python/cuml/pyproject.toml         |   6 ++
 python/libcuml/CMakeLists.txt      | 109 +++++++++++++++++++++++++++++
 python/libcuml/LICENSE             |   1 +
 python/libcuml/README.md           |   1 +
 python/libcuml/libcuml/VERSION     |   1 +
 python/libcuml/libcuml/__init__.py |  16 +++++
 python/libcuml/libcuml/_version.py |  30 ++++++++
 python/libcuml/libcuml/load.py     |  84 ++++++++++++++++++++++
 python/libcuml/pyproject.toml      |  90 ++++++++++++++++++++++++
 19 files changed, 611 insertions(+), 119 deletions(-)
 create mode 100755 ci/build_wheel_cuml.sh
 create mode 100755 ci/build_wheel_libcuml.sh
 create mode 100644 python/libcuml/CMakeLists.txt
 create mode 120000 python/libcuml/LICENSE
 create mode 120000 python/libcuml/README.md
 create mode 120000 python/libcuml/libcuml/VERSION
 create mode 100644 python/libcuml/libcuml/__init__.py
 create mode 100644 python/libcuml/libcuml/_version.py
 create mode 100644 python/libcuml/libcuml/load.py
 create mode 100644 python/libcuml/pyproject.toml

diff --git a/BUILD.md b/BUILD.md
index 059836e57d..e67f674372 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -234,7 +234,6 @@ cuML's cmake has the following configurable flags available:
 | BUILD_PRIMS_TESTS | [ON, OFF]  | ON  | Enable/disable building cuML algorithm test executable `prims_test`.  |
 | BUILD_CUML_EXAMPLES | [ON, OFF]  | ON  | Enable/disable building cuML C++ API usage examples.  |
 | BUILD_CUML_BENCH | [ON, OFF] | ON | Enable/disable building of cuML C++ benchark.  |
-| BUILD_CUML_PRIMS_BENCH | [ON, OFF] | ON | Enable/disable building of ml-prims C++ benchark.  |
 | CMAKE_CXX11_ABI | [ON, OFF]  | ON  | Enable/disable the GLIBCXX11 ABI  |
 | DETECT_CONDA_ENV | [ON, OFF] | ON | Use detection of conda environment for dependencies. If set to ON, and no value for CMAKE_INSTALL_PREFIX is passed, then it'll assign it to $CONDA_PREFIX (to install in the active environment).  |
 | DISABLE_OPENMP | [ON, OFF]  | OFF  | Set to `ON` to disable OpenMP  |
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 4a7253ca92..78b8a8a08c 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -3,57 +3,24 @@
 
 set -euo pipefail
 
-package_dir="python/cuml"
+package_name=$1
+package_dir=$2
 
 source rapids-configure-sccache
 source rapids-date-string
 
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-
-# This is the version of the suffix with a preceding hyphen. It's used
-# everywhere except in the final wheel name.
-PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
-
 rapids-generate-version > ./VERSION
 
-cd ${package_dir}
-
-case "${RAPIDS_CUDA_VERSION}" in
-  12.*)
-    EXCLUDE_ARGS=(
-      --exclude "libcuvs.so"
-      --exclude "libcublas.so.12"
-      --exclude "libcublasLt.so.12"
-      --exclude "libcufft.so.11"
-      --exclude "libcurand.so.10"
-      --exclude "libcusolver.so.11"
-      --exclude "libcusparse.so.12"
-      --exclude "libnvJitLink.so.12"
-    )
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
-    ;;
-  11.*)
-    EXCLUDE_ARGS=(
-      --exclude "libcuvs.so"
-    )
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
-    ;;
-esac
+cd "${package_dir}"
 
 sccache --zero-stats
 
-SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS}" \
-  python -m pip wheel . \
+rapids-logger "Building '${package_name}' wheel"
+python -m pip wheel \
     -w dist \
     -v \
     --no-deps \
-    --disable-pip-version-check
+    --disable-pip-version-check \
+    .
 
 sccache --show-adv-stats
-
-mkdir -p final_dist
-python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/*
-
-../../ci/validate_wheel.sh final_dist
-
-RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist
diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
new file mode 100755
index 0000000000..c6375bee54
--- /dev/null
+++ b/ci/build_wheel_cuml.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+package_dir="python/cuml"
+
+source rapids-configure-sccache
+source rapids-date-string
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+rapids-generate-version > ./VERSION
+
+cd ${package_dir}
+
+# TODO(jameslamb): split this out into build_wheel_{cuml,libcuml}.sh
+# TODO(jameslamb): add libcuml++.so to cuml exclusions
+case "${RAPIDS_CUDA_VERSION}" in
+  12.*)
+    EXCLUDE_ARGS=(
+      --exclude "libcuvs.so"
+      --exclude "libcublas.so.12"
+      --exclude "libcublasLt.so.12"
+      --exclude "libcufft.so.11"
+      --exclude "libcurand.so.10"
+      --exclude "libcusolver.so.11"
+      --exclude "libcusparse.so.12"
+      --exclude "libnvJitLink.so.12"
+    )
+    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
+    ;;
+  11.*)
+    EXCLUDE_ARGS=(
+      --exclude "libcuvs.so"
+    )
+    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
+    ;;
+esac
+
+sccache --zero-stats
+
+SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS}" \
+  python -m pip wheel . \
+    -w dist \
+    -v \
+    --no-deps \
+    --disable-pip-version-check
+
+sccache --show-adv-stats
+
+mkdir -p final_dist
+python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/*
+
+../../ci/validate_wheel.sh final_dist
+
+RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
new file mode 100755
index 0000000000..e563aed285
--- /dev/null
+++ b/ci/build_wheel_libcuml.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+package_name="libcuml"
+package_dir="python/libcuml"
+
+rapids-logger "Generating build requirements"
+
+rapids-dependency-file-generator \
+  --output requirements \
+  --file-key "py_build_${package_name}" \
+  --file-key "py_rapids_build_${package_name}" \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \
+| tee /tmp/requirements-build.txt
+
+rapids-logger "Installing build requirements"
+python -m pip install \
+    -v \
+    --prefer-binary \
+    -r /tmp/requirements-build.txt
+
+# build with '--no-build-isolation', for better sccache hit rate
+# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
+export PIP_NO_BUILD_ISOLATION=0
+
+case "${RAPIDS_CUDA_VERSION}" in
+  12.*)
+    EXCLUDE_ARGS=(
+      --exclude "libcuvs.so"
+      --exclude "libcublas.so.12"
+      --exclude "libcublasLt.so.12"
+      --exclude "libcufft.so.11"
+      --exclude "libcurand.so.10"
+      --exclude "libcusolver.so.11"
+      --exclude "libcusparse.so.12"
+      --exclude "libnvJitLink.so.12"
+    )
+    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
+    ;;
+  11.*)
+    EXCLUDE_ARGS=(
+      --exclude "libcuvs.so"
+    )
+    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
+    ;;
+esac
+
+export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS}"
+./ci/build_wheel.sh "${package_name}" "${package_dir}"
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+mkdir -p ${package_dir}/final_dist
+python -m auditwheel repair \
+    "${EXCLUDE_ARGS[@]}" \
+    -w ${package_dir}/final_dist \
+    ${package_dir}/dist/*
+
+./ci/validate_wheel.sh ${package_dir} final_dist
+
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index 86eef035cd..1eec0efac6 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -5,7 +5,8 @@ set -euo pipefail
 
 mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
 
 # On arm also need to install CMake because treelite needs to be compiled (no wheels available for arm).
 if [[ "$(arch)" == "aarch64" ]]; then
@@ -13,7 +14,9 @@ if [[ "$(arch)" == "aarch64" ]]; then
 fi
 
 # echo to expand wildcard before adding `[extra]` requires for pip
-python -m pip install $(echo ./dist/cuml*.whl)[test]
+python -m pip install \
+  ./dist/libcuml*.whl \
+  "$(echo ./dist/cuml*.whl)[test]"
 
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}"
diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index 60a80fce6b..1bd68ab7a1 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -7,6 +7,8 @@ wheel_dir_relative_path=$1
 
 rapids-logger "validate packages with 'pydistcheck'"
 
+# TODO(jameslamb) add libcuml here
+
 pydistcheck \
     --inspect \
     "$(echo ${wheel_dir_relative_path}/*.whl)"
diff --git a/cpp/README.md b/cpp/README.md
index fc85d23288..10ffa01617 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -37,7 +37,6 @@ Current cmake offers the following configuration options:
 | BUILD_PRIMS_TESTS | [ON, OFF]  | ON  | Enable/disable building cuML algorithm test executable `prims_test`.  |
 | BUILD_CUML_EXAMPLES | [ON, OFF]  | ON  | Enable/disable building cuML C++ API usage examples.  |
 | BUILD_CUML_BENCH | [ON, OFF]  | ON  | Enable/disable building of cuML C++ benchark. |
-| BUILD_CUML_PRIMS_BENCH | [ON, OFF]  | ON  | Enable/disable building of ml-prims C++ benchark. |
 | BUILD_CUML_STD_COMMS | [ON, OFF] | ON | Enable/disable building cuML NCCL+UCX communicator for running multi-node multi-GPU algorithms. Note that UCX support can also be enabled/disabled (see below). The standard communicator and MPI communicator are not mutually exclusive and can both be installed at the same time. |
 | WITH_UCX | [ON, OFF] | OFF | Enable/disable UCX support in the standard cuML communicator. Algorithms requiring point-to-point messaging will not work when this is disabled. This flag is ignored if BUILD_CUML_STD_COMMS is set to OFF. |
 | BUILD_CUML_MPI_COMMS | [ON, OFF] | OFF | Enable/disable building cuML MPI+NCCL communicator for running multi-node multi-GPU C++ tests. MPI communicator and STD communicator may both be installed at the same time. If OFF, it overrides BUILD_CUML_MG_TESTS to be OFF as well. |
diff --git a/dependencies.yaml b/dependencies.yaml
index 773afce021..d65e45e586 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -9,6 +9,7 @@ files:
       - common_build
       - cuda
       - cuda_version
+      - depends_on_librmm
       - docs
       - py_build
       - py_run
@@ -81,16 +82,18 @@ files:
       key: requires
     includes:
       - common_build
+      - depends_on_librmm
       - py_build
-  py_run:
+  py_run_cuml:
     output: pyproject
     pyproject_dir: python/cuml
     extras:
       table: project
     includes:
       - cuda_wheels
+      - depends_on_libcuml
       - py_run
-  py_test:
+  py_test_cuml:
     output: pyproject
     pyproject_dir: python/cuml
     extras:
@@ -98,6 +101,31 @@ files:
       key: test
     includes:
       - test_python
+  py_build_libcuml:
+    output: pyproject
+    pyproject_dir: python/libcuml
+    extras:
+      table: build-system
+    includes:
+      - rapids_build_backend
+  py_rapids_build_libcuml:
+    output: pyproject
+    pyproject_dir: python/libcuml
+    extras:
+      table: tool.rapids-build-backend
+      key: requires
+    includes:
+      - common_build
+      - depends_on_libraft
+      - depends_on_librmm
+      - py_build
+  py_run_libcuml:
+    output: pyproject
+    pyproject_dir: python/libcuml
+    extras:
+      table: project
+    includes:
+      - cuda_wheels
 channels:
   - rapidsai
   - rapidsai-nightly
@@ -540,3 +568,75 @@ dependencies:
           - *scikit_learn
           - seaborn
           - *xgboost
+  depends_on_libcuml:
+    common:
+      - output_types: conda
+        packages:
+          - &libcuml_unsuffixed libcuml==25.2.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcuml-cu12==25.2.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcuml-cu11==25.2.*,>=0.0.0a0
+          - {matrix: null, packages: [*libcuml_unsuffixed]}
+  depends_on_libraft:
+    common:
+      - output_types: conda
+        packages:
+          - &libraft_unsuffixed libraft=25.2.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - libraft-cu12==25.2.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - libraft-cu11==25.2.*,>=0.0.0a0
+          - {matrix: null, packages: [*libraft_unsuffixed]}
+  depends_on_librmm:
+    common:
+      - output_types: conda
+        packages:
+          - &librmm_unsuffixed librmm==25.2.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - librmm-cu12==25.2.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - librmm-cu11==25.2.*,>=0.0.0a0
+          - {matrix: null, packages: [*librmm_unsuffixed]}
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 221b5ebf75..34fabd2d9d 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -42,7 +42,6 @@ option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libr
 option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
 set(CUML_RAFT_CLONE_ON_PIN OFF)
 
-
 # todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
 # https://github.com/rapidsai/cuml/issues/4843
 message(VERBOSE "CUML_PY: Build only cuML CPU Python components.: ${CUML_CPU}")
@@ -57,79 +56,38 @@ set(CUML_CPP_SRC "../../cpp")
 ################################################################################
 # - Process User Options  ------------------------------------------------------
 
-# If the user requested it, we attempt to find cuml.
-if(FIND_CUML_CPP)
-  # We need to call get_treelite explicitly because we need the correct
-  # ${TREELITE_LIBS} definition for RF
-  include(rapids-cpm)
-  include(rapids-export)
-  rapids_cpm_init()
-  find_package(cuml ${CUML_VERSION} REQUIRED)
-  include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
-else()
-  set(cuml_FOUND OFF)
-endif()
+# We need to call get_treelite explicitly because we need the correct
+# ${TREELITE_LIBS} definition for RF
+#
+# and it needs to come before find_package(cuml), because it's a PUBLIC
+# dependency of cuml::cuml
+include(rapids-cpm)
+include(rapids-export)
+rapids_cpm_init()
+include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
+
+find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
+
+# # If the user requested it, we attempt to find cuml.
+# if(FIND_CUML_CPP)
+#   # We need to call get_treelite explicitly because we need the correct
+#   # ${TREELITE_LIBS} definition for RF
+#   include(rapids-cpm)
+#   include(rapids-export)
+#   rapids_cpm_init()
+#   find_package(cuml ${CUML_VERSION} REQUIRED)
+#   include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
+# else()
+#   set(cuml_FOUND OFF)
+# endif()
 
 include(rapids-cython-core)
 
 set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)
 
+# cuml-cpu does not need libcuml++.so
 if(NOT CUML_CPU)
-  if(NOT cuml_FOUND)
-    find_package(CUDAToolkit REQUIRED)
-
-    set(BUILD_CUML_TESTS OFF)
-    set(BUILD_PRIMS_TESTS OFF)
-    set(BUILD_CUML_C_LIBRARY OFF)
-    set(BUILD_CUML_EXAMPLES OFF)
-    set(BUILD_CUML_BENCH OFF)
-    set(BUILD_CUML_PRIMS_BENCH OFF)
-    set(CUML_EXPORT_TREELITE_LINKAGE ON)
-    set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
-
-    # Statically link dependencies if building wheels
-    set(CUDA_STATIC_RUNTIME ON)
-    set(CUML_USE_CUVS_STATIC ON)
-    set(CUML_USE_FAISS_STATIC ON)
-    set(CUML_USE_TREELITE_STATIC ON)
-    set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
-    # Link to the CUDA wheels with shared libraries for CUDA 12+
-    if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0)
-      set(CUDA_STATIC_MATH_LIBRARIES OFF)
-    else()
-      if(USE_CUDA_MATH_WHEELS)
-        message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0")
-      endif()
-      set(CUDA_STATIC_MATH_LIBRARIES ON)
-    endif()
-    # Don't install the static libs into wheels
-    set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
-    set(RAFT_EXCLUDE_FAISS_FROM_ALL ON)
-    set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
-    set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
-
-    add_subdirectory(${CUML_CPP_SRC} cuml-cpp EXCLUDE_FROM_ALL)
-
-    if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
-      set(rpaths
-        "$ORIGIN/../nvidia/cublas/lib"
-        "$ORIGIN/../nvidia/cufft/lib"
-        "$ORIGIN/../nvidia/curand/lib"
-        "$ORIGIN/../nvidia/cusolver/lib"
-        "$ORIGIN/../nvidia/cusparse/lib"
-        "$ORIGIN/../nvidia/nvjitlink/lib"
-      )
-      set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
-    endif()
-
-    if(USE_CUVS_WHEEL)
-      set(rpaths "$ORIGIN/../cuvs")
-      set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
-    endif()
-
-    set(cython_lib_dir cuml)
-    install(TARGETS ${CUML_CPP_TARGET} DESTINATION ${cython_lib_dir})
-  endif()
+  # (moved to libcuml/CMakeLists.txt)
 endif()
 
 if(CUML_CPU)
@@ -209,10 +167,6 @@ add_subdirectory(cuml/tsa)
 
 add_subdirectory(cuml/experimental/linear_model)
 
-if(DEFINED cython_lib_dir)
-  rapids_cython_add_rpath_entries(TARGET cuml PATHS "${cython_lib_dir}")
-endif()
-
 if(USE_CUVS_WHEEL)
   rapids_cython_add_rpath_entries(TARGET cuml PATHS cuvs)
 endif()
diff --git a/python/cuml/cuml/__init__.py b/python/cuml/cuml/__init__.py
index 62ab93c1b4..a536979c1c 100644
--- a/python/cuml/cuml/__init__.py
+++ b/python/cuml/cuml/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,16 @@
 # limitations under the License.
 #
 
+# If libcuml was installed as a wheel, we must request it to load the library symbols.
+# Otherwise, we assume that the library was installed in a system path that ld can find.
+try:
+    import libcuml
+except ModuleNotFoundError:
+    pass
+else:
+    libcuml.load_library()
+    del libcuml
+
 from cuml.internals.base import Base, UniversalBase
 from cuml.internals.available_devices import is_cuda_available
 
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index 2f0521fe6e..a6d7c6c618 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -20,6 +20,7 @@ requires = [
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.pydistcheck]
+# TODO(jameslamb): update this once libcuml is working
 select = [
     "distro-too-large-compressed",
 ]
@@ -96,6 +97,7 @@ dependencies = [
     "dask-cuda==25.2.*,>=0.0.0a0",
     "dask-cudf==25.2.*,>=0.0.0a0",
     "joblib>=0.11",
+    "libcuml==25.2.*,>=0.0.0a0",
     "numba>=0.57",
     "numpy>=1.23,<3.0a0",
     "nvidia-cublas",
@@ -172,12 +174,16 @@ versioneer\.py |
 [tool.rapids-build-backend]
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
+# TODO(jameslamb): can 'cuda_wheels=true' be removed?
 matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
+
+# TODO(jameslamb): check build deps for cuml
 requires = [
     "cmake>=3.26.4,!=3.30.0",
     "cuda-python",
     "cuvs==25.2.*,>=0.0.0a0",
     "cython>=3.0.0",
+    "librmm==25.2.*,>=0.0.0a0",
     "ninja",
     "pylibraft==25.2.*,>=0.0.0a0",
     "rmm==25.2.*,>=0.0.0a0",
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
new file mode 100644
index 0000000000..7e2c1903aa
--- /dev/null
+++ b/python/libcuml/CMakeLists.txt
@@ -0,0 +1,109 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+
+include(../../rapids_config.cmake)
+
+project(
+  libcuml-python
+  VERSION "${RAPIDS_VERSION}"
+  LANGUAGES CXX
+)
+
+################################################################################
+# - User Options  --------------------------------------------------------------
+option(CUML_UNIVERSAL "Build all cuML Python components." ON)
+option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
+option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF)
+option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
+
+# todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
+# https://github.com/rapidsai/cuml/issues/4843
+message(VERBOSE "CUML_PY: Build only cuML CPU Python components.: ${CUML_CPU}")
+message(VERBOSE "CUML_PY: Searching for existing CUML C++ installations before defaulting to local files: ${FIND_CUML_CPP}")
+message(VERBOSE "CUML_PY: Disabling all mnmg components and comms libraries: ${SINGLEGPU}")
+
+set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")
+
+set(CUML_CPP_TARGET "cuml++")
+set(CUML_CPP_SRC "../../cpp")
+
+################################################################################
+# - Process User Options  ------------------------------------------------------
+
+# Check if cuml is already available. If so, it is the user's responsibility to ensure that the
+# CMake package is also available at build time of the Python cuml package.
+find_package(cuml "${RAPIDS_VERSION}")
+
+if(cuml_FOUND)
+  return()
+endif()
+
+unset(cuml_FOUND)
+
+find_package(CUDAToolkit REQUIRED)
+
+set(BUILD_CUML_TESTS OFF)
+set(BUILD_PRIMS_TESTS OFF)
+set(BUILD_CUML_C_LIBRARY OFF)
+set(BUILD_CUML_EXAMPLES OFF)
+set(BUILD_CUML_BENCH OFF)
+set(CUML_EXPORT_TREELITE_LINKAGE ON)
+set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
+set(CUML_RAFT_CLONE_ON_PIN OFF)
+
+# Statically link dependencies if building wheels
+set(CUDA_STATIC_RUNTIME ON)
+set(CUML_USE_CUVS_STATIC ON)
+set(CUML_USE_TREELITE_STATIC ON)
+set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
+
+# Don't install the static libs into wheels
+set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
+# TODO(jameslamb): should treelite just be included here?
+set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
+# TODO(jameslamb): figure out how cumlprims_mg should work (re-download in cuml? only install/export the headers in libcuml?)
+set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL OFF)
+
+# Link to the CUDA wheels with shared libraries for CUDA 12+
+if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0)
+  set(CUDA_STATIC_MATH_LIBRARIES OFF)
+else()
+  if(USE_CUDA_MATH_WHEELS)
+    message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0")
+  endif()
+  set(CUDA_STATIC_MATH_LIBRARIES ON)
+endif()
+
+# TODO(jameslamb): keep cuvs files (like lib64/libcuvs.{a,so}) out of the wheel
+# TODO(jameslamb): audit all file contents
+add_subdirectory(../../cpp cuml-cpp)
+
+if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
+  set(rpaths
+    "$ORIGIN/../nvidia/cublas/lib"
+    "$ORIGIN/../nvidia/cufft/lib"
+    "$ORIGIN/../nvidia/curand/lib"
+    "$ORIGIN/../nvidia/cusolver/lib"
+    "$ORIGIN/../nvidia/cusparse/lib"
+    "$ORIGIN/../nvidia/nvjitlink/lib"
+  )
+  set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
+endif()
+
+if(USE_CUVS_WHEEL)
+  set(rpaths "$ORIGIN/../cuvs")
+  set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
+endif()
diff --git a/python/libcuml/LICENSE b/python/libcuml/LICENSE
new file mode 120000
index 0000000000..30cff7403d
--- /dev/null
+++ b/python/libcuml/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/libcuml/README.md b/python/libcuml/README.md
new file mode 120000
index 0000000000..fe84005413
--- /dev/null
+++ b/python/libcuml/README.md
@@ -0,0 +1 @@
+../../README.md
\ No newline at end of file
diff --git a/python/libcuml/libcuml/VERSION b/python/libcuml/libcuml/VERSION
new file mode 120000
index 0000000000..d62dc733ef
--- /dev/null
+++ b/python/libcuml/libcuml/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/libcuml/libcuml/__init__.py b/python/libcuml/libcuml/__init__.py
new file mode 100644
index 0000000000..d86d2f1066
--- /dev/null
+++ b/python/libcuml/libcuml/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcuml._version import __git_commit__, __version__
+from libcuml.load import load_library
diff --git a/python/libcuml/libcuml/_version.py b/python/libcuml/libcuml/_version.py
new file mode 100644
index 0000000000..b347d590c5
--- /dev/null
+++ b/python/libcuml/libcuml/_version.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.resources
+
+__version__ = (
+    importlib.resources.files(__package__).joinpath("VERSION").read_text().strip()
+)
+try:
+    __git_commit__ = (
+        importlib.resources.files(__package__)
+        .joinpath("GIT_COMMIT")
+        .read_text()
+        .strip()
+    )
+except FileNotFoundError:
+    __git_commit__ = ""
+
+__all__ = ["__git_commit__", "__version__"]
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
new file mode 100644
index 0000000000..729abd7fef
--- /dev/null
+++ b/python/libcuml/libcuml/load.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ctypes
+import os
+
+# Loading with RTLD_LOCAL adds the library itself to the loader's
+# loaded library cache without loading any symbols into the global
+# namespace. This allows libraries that express a dependency on
+# this library to be loaded later and successfully satisfy this dependency
+# without polluting the global symbol table with symbols from
+# libcuml that could conflict with symbols from other DSOs.
+PREFERRED_LOAD_FLAG = ctypes.RTLD_LOCAL
+
+
+def _load_system_installation(soname: str):
+    """Try to dlopen() the library indicated by ``soname``
+    Raises ``OSError`` if library cannot be loaded.
+    """
+    return ctypes.CDLL(soname, PREFERRED_LOAD_FLAG)
+
+
+def _load_wheel_installation(soname: str):
+    """Try to dlopen() the library indicated by ``soname``
+
+    Returns ``None`` if the library cannot be loaded.
+    """
+    if os.path.isfile(
+        lib := os.path.join(os.path.dirname(__file__), "lib64", soname)
+    ):
+        return ctypes.CDLL(lib, PREFERRED_LOAD_FLAG)
+    return None
+
+
+def load_library():
+    """Dynamically load libcuml++.so and its dependencies"""
+    prefer_system_installation = (
+        os.getenv("RAPIDS_LIBCUML_PREFER_SYSTEM_LIBRARY", "false").lower()
+        != "false"
+    )
+
+    # TODO(jameslamb): remove for loop?
+    for soname in ["libcuml++.so"]:
+        libcuml_lib = None
+        if prefer_system_installation:
+            # Prefer a system library if one is present to
+            # avoid clobbering symbols that other packages might expect, but if no
+            # other library is present use the one in the wheel.
+            try:
+                libcuml_lib = _load_system_installation(soname)
+            except OSError:
+                libcuml_lib = _load_wheel_installation(soname)
+        else:
+            # Prefer the libraries bundled in this package. If they aren't found
+            # (which might be the case in builds where the library was prebuilt before
+            # packaging the wheel), look for a system installation.
+            try:
+                libcuml_lib = _load_wheel_installation(soname)
+                if libcuml_lib is None:
+                    libcuml_lib = _load_system_installation(soname)
+            except OSError:
+                # If none of the searches above succeed, just silently return None
+                # and rely on other mechanisms (like RPATHs on other DSOs) to
+                # help the loader find the library.
+                pass
+
+    # The caller almost never needs to do anything with this library, but no
+    # harm in offering the option since this object at least provides a handle
+    # to inspect where libcuml was loaded from.
+
+    # TODO(jameslamb): return something here?
+    # return libcugraph_lib
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
new file mode 100644
index 0000000000..7461b735f5
--- /dev/null
+++ b/python/libcuml/pyproject.toml
@@ -0,0 +1,90 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[build-system]
+build-backend = "rapids_build_backend.build"
+requires = [
+    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "scikit-build-core[pyproject]>=0.10.0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project]
+name = "libcuml"
+dynamic = ["version"]
+description = "cuML - RAPIDS ML Algorithms (C++)"
+readme = { file = "README.md", content-type = "text/markdown" }
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.10"
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Scientific/Engineering",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: C++",
+    "Environment :: GPU :: NVIDIA CUDA",
+]
+dependencies = [
+    "nvidia-cublas",
+    "nvidia-cufft",
+    "nvidia-curand",
+    "nvidia-cusolver",
+    "nvidia-cusparse",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/cuml"
+
+[project.entry-points."cmake.prefix"]
+libcuml= "libcuml"
+
+[tool.pydistcheck]
+select = [
+    # NOTE: size threshold is managed via CLI args in CI scripts
+    "distro-too-large-compressed",
+]
+
+[tool.scikit-build]
+build-dir = "build/{wheel_tag}"
+cmake.build-type = "Release"
+cmake.version = "CMakeLists.txt"
+minimum-version = "build-system.requires"
+ninja.make-fallback = true
+sdist.reproducible = true
+wheel.packages = ["libcuml"]
+wheel.install-dir = "libcuml"
+wheel.py-api = "py3"
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "libcuml/VERSION"
+regex = "(?P<value>.*)"
+
+[tool.rapids-build-backend]
+build-backend = "scikit_build_core.build"
+dependencies-file = "../../dependencies.yaml"
+matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
+requires = [
+    "cmake>=3.26.4,!=3.30.0",
+    "cuda-python",
+    "cuvs==25.2.*,>=0.0.0a0",
+    "cython>=3.0.0",
+    "libraft=25.2.*,>=0.0.0a0",
+    "librmm==25.2.*,>=0.0.0a0",
+    "ninja",
+    "pylibraft==25.2.*,>=0.0.0a0",
+    "rmm==25.2.*,>=0.0.0a0",
+    "treelite==4.3.0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From 13db0a99a79c9ae656171e73859dcf91f4260933 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 30 Dec 2024 15:23:07 -0800
Subject: [PATCH 02/48] remove uses of ASSOCIATED_TARGETS, lots of other
 refactoring

---
 ci/build_wheel_cuml.sh                        | 31 ++++++-------------
 dependencies.yaml                             | 12 +++++--
 python/cuml/CMakeLists.txt                    | 10 ++++--
 python/cuml/cuml/cluster/CMakeLists.txt       |  1 -
 .../cuml/cuml/cluster/hdbscan/CMakeLists.txt  |  1 -
 python/cuml/cuml/common/CMakeLists.txt        |  1 -
 python/cuml/cuml/datasets/CMakeLists.txt      |  1 -
 python/cuml/cuml/decomposition/CMakeLists.txt |  1 -
 python/cuml/cuml/ensemble/CMakeLists.txt      |  1 -
 .../cuml/cuml/experimental/fil/CMakeLists.txt |  1 -
 .../experimental/linear_model/CMakeLists.txt  |  1 -
 python/cuml/cuml/explainer/CMakeLists.txt     |  1 -
 python/cuml/cuml/fil/CMakeLists.txt           |  1 -
 python/cuml/cuml/internals/CMakeLists.txt     |  1 -
 python/cuml/cuml/kernel_ridge/CMakeLists.txt  |  1 -
 python/cuml/cuml/linear_model/CMakeLists.txt  |  1 -
 python/cuml/cuml/manifold/CMakeLists.txt      |  1 -
 python/cuml/cuml/metrics/CMakeLists.txt       |  1 -
 .../cuml/cuml/metrics/cluster/CMakeLists.txt  |  1 -
 python/cuml/cuml/neighbors/CMakeLists.txt     |  1 -
 .../cuml/random_projection/CMakeLists.txt     |  1 -
 python/cuml/cuml/solvers/CMakeLists.txt       |  1 -
 python/cuml/cuml/svm/CMakeLists.txt           |  1 -
 python/cuml/cuml/tsa/CMakeLists.txt           |  1 -
 python/libcuml/CMakeLists.txt                 | 14 ++++++---
 python/libcuml/libcuml/load.py                |  6 +++-
 26 files changed, 42 insertions(+), 52 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index c6375bee54..1969aa9315 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -3,17 +3,11 @@
 
 set -euo pipefail
 
+package_name="cuml"
 package_dir="python/cuml"
 
-source rapids-configure-sccache
-source rapids-date-string
-
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-rapids-generate-version > ./VERSION
-
-cd ${package_dir}
-
 # TODO(jameslamb): split this out into build_wheel_{cuml,libcuml}.sh
 # TODO(jameslamb): add libcuml++.so to cuml exclusions
 case "${RAPIDS_CUDA_VERSION}" in
@@ -38,20 +32,15 @@ case "${RAPIDS_CUDA_VERSION}" in
     ;;
 esac
 
-sccache --zero-stats
-
-SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS}" \
-  python -m pip wheel . \
-    -w dist \
-    -v \
-    --no-deps \
-    --disable-pip-version-check
-
-sccache --show-adv-stats
+export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS}"
+./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
-mkdir -p final_dist
-python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/*
+mkdir -p ${package_dir}/final_dist
+python -m auditwheel repair \
+    "${EXCLUDE_ARGS[@]}" \
+    -w ${package_dir}/final_dist \
+    ${package_dir}/dist/*
 
-../../ci/validate_wheel.sh final_dist
+./ci/validate_wheel.sh ${package_dir} final_dist
 
-RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python "${package_dir}/final_dist"
diff --git a/dependencies.yaml b/dependencies.yaml
index d65e45e586..3887f7295e 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -10,6 +10,7 @@ files:
       - cuda
       - cuda_version
       - depends_on_librmm
+      - depends_on_treelite
       - docs
       - py_build
       - py_run
@@ -83,6 +84,8 @@ files:
     includes:
       - common_build
       - depends_on_librmm
+      # TODO(jameslamb): does cuml really need treelite at build time?
+      - depends_on_treelite
       - py_build
   py_run_cuml:
     output: pyproject
@@ -92,6 +95,7 @@ files:
     includes:
       - cuda_wheels
       - depends_on_libcuml
+      - depends_on_treelite
       - py_run
   py_test_cuml:
     output: pyproject
@@ -118,6 +122,7 @@ files:
       - common_build
       - depends_on_libraft
       - depends_on_librmm
+      - depends_on_treelite
       - py_build
   py_run_libcuml:
     output: pyproject
@@ -208,7 +213,6 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - &cython cython>=3.0.0
-          - &treelite treelite==4.3.0
       - output_types: conda
         packages:
           - &cuvs_unsuffixed cuvs==25.2.*,>=0.0.0a0
@@ -270,7 +274,6 @@ dependencies:
           - scipy>=1.8.0
           - packaging
           - rapids-dask-dependency==25.2.*,>=0.0.0a0
-          - *treelite
       - output_types: conda
         packages:
           - &cudf_unsuffixed cudf==25.2.*,>=0.0.0a0
@@ -640,3 +643,8 @@ dependencies:
             packages:
               - librmm-cu11==25.2.*,>=0.0.0a0
           - {matrix: null, packages: [*librmm_unsuffixed]}
+  depends_on_treelite:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - treelite==4.3.0
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 34fabd2d9d..46f897fc62 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -64,6 +64,14 @@ set(CUML_CPP_SRC "../../cpp")
 include(rapids-cpm)
 include(rapids-export)
 rapids_cpm_init()
+
+set(CUML_EXPORT_TREELITE_LINKAGE ON)
+set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)
+# use dynamic treelite (provided by treelite wheels)
+# TODO(jameslamb): does this make sense?
+set(CUML_USE_TREELITE_STATIC OFF)
+# TODO(jameslamb): should treelite just be included here?
+set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
 include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
 
 find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
@@ -83,8 +91,6 @@ find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
 
 include(rapids-cython-core)
 
-set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)
-
 # cuml-cpu does not need libcuml++.so
 if(NOT CUML_CPU)
   # (moved to libcuml/CMakeLists.txt)
diff --git a/python/cuml/cuml/cluster/CMakeLists.txt b/python/cuml/cuml/cluster/CMakeLists.txt
index 43d15ae6fa..f06180d1d4 100644
--- a/python/cuml/cuml/cluster/CMakeLists.txt
+++ b/python/cuml/cuml/cluster/CMakeLists.txt
@@ -31,5 +31,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX cluster_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt b/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt
index 2c4b41909d..1c093e5f10 100644
--- a/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt
+++ b/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt
@@ -21,5 +21,4 @@ rapids_cython_create_modules(
         SOURCE_FILES "${cython_sources}"
         LINKED_LIBRARIES "${cuml_sg_libraries}"
         MODULE_PREFIX cluster_hdbscan_
-        ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/common/CMakeLists.txt b/python/cuml/cuml/common/CMakeLists.txt
index 1492dcd46a..146f94ab6d 100644
--- a/python/cuml/cuml/common/CMakeLists.txt
+++ b/python/cuml/cuml/common/CMakeLists.txt
@@ -28,7 +28,6 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX common_
-  ASSOCIATED_TARGETS cuml
 )
 
 if(${CUML_UNIVERSAL})
diff --git a/python/cuml/cuml/datasets/CMakeLists.txt b/python/cuml/cuml/datasets/CMakeLists.txt
index 51d6614600..50813b60d1 100644
--- a/python/cuml/cuml/datasets/CMakeLists.txt
+++ b/python/cuml/cuml/datasets/CMakeLists.txt
@@ -21,5 +21,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX datasets_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/decomposition/CMakeLists.txt b/python/cuml/cuml/decomposition/CMakeLists.txt
index 2552c80d74..7c5bfa39f5 100644
--- a/python/cuml/cuml/decomposition/CMakeLists.txt
+++ b/python/cuml/cuml/decomposition/CMakeLists.txt
@@ -30,5 +30,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX decomposition_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/ensemble/CMakeLists.txt b/python/cuml/cuml/ensemble/CMakeLists.txt
index e3732c1577..8a3833a3e7 100644
--- a/python/cuml/cuml/ensemble/CMakeLists.txt
+++ b/python/cuml/cuml/ensemble/CMakeLists.txt
@@ -27,5 +27,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
   MODULE_PREFIX ensemble_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/experimental/fil/CMakeLists.txt b/python/cuml/cuml/experimental/fil/CMakeLists.txt
index f558a47620..21ed8a2d5e 100644
--- a/python/cuml/cuml/experimental/fil/CMakeLists.txt
+++ b/python/cuml/cuml/experimental/fil/CMakeLists.txt
@@ -26,5 +26,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
   MODULE_PREFIX experimental_fil_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/experimental/linear_model/CMakeLists.txt b/python/cuml/cuml/experimental/linear_model/CMakeLists.txt
index 6c52c3cd16..3d500ef382 100644
--- a/python/cuml/cuml/experimental/linear_model/CMakeLists.txt
+++ b/python/cuml/cuml/experimental/linear_model/CMakeLists.txt
@@ -22,5 +22,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX experimental_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/explainer/CMakeLists.txt b/python/cuml/cuml/explainer/CMakeLists.txt
index e982fb1264..c1e4a36cf3 100644
--- a/python/cuml/cuml/explainer/CMakeLists.txt
+++ b/python/cuml/cuml/explainer/CMakeLists.txt
@@ -28,5 +28,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
   MODULE_PREFIX explainer_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/fil/CMakeLists.txt b/python/cuml/cuml/fil/CMakeLists.txt
index 54e2df2cd4..abb76e9450 100644
--- a/python/cuml/cuml/fil/CMakeLists.txt
+++ b/python/cuml/cuml/fil/CMakeLists.txt
@@ -27,5 +27,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
   MODULE_PREFIX fil_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/internals/CMakeLists.txt b/python/cuml/cuml/internals/CMakeLists.txt
index d4ebfdd01d..d1932d4022 100644
--- a/python/cuml/cuml/internals/CMakeLists.txt
+++ b/python/cuml/cuml/internals/CMakeLists.txt
@@ -26,7 +26,6 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX internals_
-  ASSOCIATED_TARGETS cuml
 )
 
 # We need to include for callbacks_implements.h in the internals folder
diff --git a/python/cuml/cuml/kernel_ridge/CMakeLists.txt b/python/cuml/cuml/kernel_ridge/CMakeLists.txt
index 75421a1752..81c0736577 100644
--- a/python/cuml/cuml/kernel_ridge/CMakeLists.txt
+++ b/python/cuml/cuml/kernel_ridge/CMakeLists.txt
@@ -22,5 +22,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX kernel_ridge_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/linear_model/CMakeLists.txt b/python/cuml/cuml/linear_model/CMakeLists.txt
index aa72642453..c29a4ddbab 100644
--- a/python/cuml/cuml/linear_model/CMakeLists.txt
+++ b/python/cuml/cuml/linear_model/CMakeLists.txt
@@ -37,5 +37,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX linear_model_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/manifold/CMakeLists.txt b/python/cuml/cuml/manifold/CMakeLists.txt
index 115705d9af..7a2c8cac8a 100644
--- a/python/cuml/cuml/manifold/CMakeLists.txt
+++ b/python/cuml/cuml/manifold/CMakeLists.txt
@@ -25,5 +25,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX manifold_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/metrics/CMakeLists.txt b/python/cuml/cuml/metrics/CMakeLists.txt
index 0a6e789c13..80a2757383 100644
--- a/python/cuml/cuml/metrics/CMakeLists.txt
+++ b/python/cuml/cuml/metrics/CMakeLists.txt
@@ -27,5 +27,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX metrics_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/metrics/cluster/CMakeLists.txt b/python/cuml/cuml/metrics/cluster/CMakeLists.txt
index a81708674c..8d67fb8c11 100644
--- a/python/cuml/cuml/metrics/cluster/CMakeLists.txt
+++ b/python/cuml/cuml/metrics/cluster/CMakeLists.txt
@@ -28,5 +28,4 @@ rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/neighbors/CMakeLists.txt b/python/cuml/cuml/neighbors/CMakeLists.txt
index dbb23550aa..0a8be580d5 100644
--- a/python/cuml/cuml/neighbors/CMakeLists.txt
+++ b/python/cuml/cuml/neighbors/CMakeLists.txt
@@ -32,5 +32,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX neighbors_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/random_projection/CMakeLists.txt b/python/cuml/cuml/random_projection/CMakeLists.txt
index f4e54397ac..649dbbf774 100644
--- a/python/cuml/cuml/random_projection/CMakeLists.txt
+++ b/python/cuml/cuml/random_projection/CMakeLists.txt
@@ -22,5 +22,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX random_projection_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/solvers/CMakeLists.txt b/python/cuml/cuml/solvers/CMakeLists.txt
index a6eada58a9..23599a9241 100644
--- a/python/cuml/cuml/solvers/CMakeLists.txt
+++ b/python/cuml/cuml/solvers/CMakeLists.txt
@@ -31,5 +31,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_mg_libraries}"
   MODULE_PREFIX solvers_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/svm/CMakeLists.txt b/python/cuml/cuml/svm/CMakeLists.txt
index 3b9ab0e199..113b7b1430 100644
--- a/python/cuml/cuml/svm/CMakeLists.txt
+++ b/python/cuml/cuml/svm/CMakeLists.txt
@@ -25,5 +25,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX svm_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/cuml/cuml/tsa/CMakeLists.txt b/python/cuml/cuml/tsa/CMakeLists.txt
index 3cbe54bded..208e1a9cf0 100644
--- a/python/cuml/cuml/tsa/CMakeLists.txt
+++ b/python/cuml/cuml/tsa/CMakeLists.txt
@@ -27,5 +27,4 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${cuml_sg_libraries}"
   MODULE_PREFIX tsa_
-  ASSOCIATED_TARGETS cuml
 )
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index 7e2c1903aa..30c6e11649 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -60,20 +60,24 @@ set(BUILD_PRIMS_TESTS OFF)
 set(BUILD_CUML_C_LIBRARY OFF)
 set(BUILD_CUML_EXAMPLES OFF)
 set(BUILD_CUML_BENCH OFF)
-set(CUML_EXPORT_TREELITE_LINKAGE ON)
-set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
 set(CUML_RAFT_CLONE_ON_PIN OFF)
 
+# --- treelite --- #
+set(CUML_EXPORT_TREELITE_LINKAGE ON)
+set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)
+# use dynamic treelite (provided by treelite wheels)
+# TODO(jameslamb): does this make sense?
+set(CUML_USE_TREELITE_STATIC OFF)
+# TODO(jameslamb): should treelite just be included here?
+set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
+
 # Statically link dependencies if building wheels
 set(CUDA_STATIC_RUNTIME ON)
 set(CUML_USE_CUVS_STATIC ON)
-set(CUML_USE_TREELITE_STATIC ON)
 set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
 
 # Don't install the static libs into wheels
 set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
-# TODO(jameslamb): should treelite just be included here?
-set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
 # TODO(jameslamb): figure out how cumlprims_mg should work (re-download in cuml? only install/export the headers in libcuml?)
 set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL OFF)
 
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
index 729abd7fef..5e72d7ec6f 100644
--- a/python/libcuml/libcuml/load.py
+++ b/python/libcuml/libcuml/load.py
@@ -46,6 +46,10 @@ def _load_wheel_installation(soname: str):
 
 def load_library():
     """Dynamically load libcuml++.so and its dependencies"""
+    # treelite must be loaded before libcuml++ because libcuml++
+    # references its symbols
+    import treelite
+
     prefer_system_installation = (
         os.getenv("RAPIDS_LIBCUML_PREFER_SYSTEM_LIBRARY", "false").lower()
         != "false"
@@ -81,4 +85,4 @@ def load_library():
     # to inspect where libcuml was loaded from.
 
     # TODO(jameslamb): return something here?
-    # return libcugraph_lib
+    return libcuml_lib

From 368faea5fd452284d72ec1ef65859f96070db981 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 31 Dec 2024 08:01:38 -0800
Subject: [PATCH 03/48] more changes

---
 .github/workflows/build.yaml  | 21 +++++++++++++++++++++
 .github/workflows/pr.yaml     | 15 ++++++++++++++-
 build.sh                      |  5 -----
 python/cuml/CMakeLists.txt    | 15 ---------------
 python/libcuml/CMakeLists.txt |  2 --
 5 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 9a4b614584..2342af477b 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -66,6 +66,27 @@ jobs:
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
+  wheel-build-libcuml:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      script: ci/build_wheel_libcuml.sh
+      # build for every combination of arch and CUDA version, but only for the latest Python
+      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
+  wheel-publish-libcuml:
+    needs: wheel-build-libcuml
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      package-name: libcuml
   wheel-build-cuml:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 95d648f8e6..abc54e424a 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -25,6 +25,7 @@ jobs:
       - conda-notebook-tests
       - docs-build
       - telemetry-setup
+      - wheel-build-libcuml
       - wheel-build-cuml
       - wheel-tests-cuml
       - devcontainer
@@ -166,10 +167,22 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/build_docs.sh"
-  wheel-build-cuml:
+  wheel-build-libcuml:
     needs: checks
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    with:
+      build_type: pull-request
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      script: ci/build_wheel_libcuml.sh
+      # build for every combination of arch and CUDA version, but only for the latest Python
+      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
+  wheel-build-cuml:
+    needs: [checks, wheel-build-libcuml]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
       build_type: pull-request
       script: ci/build_wheel.sh
diff --git a/build.sh b/build.sh
index 9eb36f103c..03c44c033f 100755
--- a/build.sh
+++ b/build.sh
@@ -288,11 +288,6 @@ if (! hasArg --configure-only) && (completeBuild || hasArg cuml || hasArg pydocs
     # Replace spaces with semicolons in SKBUILD_EXTRA_CMAKE_ARGS
     SKBUILD_EXTRA_CMAKE_ARGS=$(echo ${SKBUILD_EXTRA_CMAKE_ARGS} | sed 's/ /;/g')
 
-    # Append `-DFIND_CUML_CPP=ON` to CUML_EXTRA_CMAKE_ARGS unless a user specified the option.
-    if [[ "${SKBUILD_EXTRA_CMAKE_ARGS}" != *"DFIND_CUML_CPP"* ]]; then
-        SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS};-DFIND_CUML_CPP=ON"
-    fi
-
     SKBUILD_CMAKE_ARGS="-DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL};${SKBUILD_EXTRA_CMAKE_ARGS}" \
         python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/cuml
 
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 46f897fc62..92cb410ae8 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -36,7 +36,6 @@ project(
 ################################################################################
 # - User Options  --------------------------------------------------------------
 option(CUML_UNIVERSAL "Build all cuML Python components." ON)
-option(FIND_CUML_CPP "Search for existing CUML C++ installations before defaulting to local files" OFF)
 option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
 option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF)
 option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
@@ -45,7 +44,6 @@ set(CUML_RAFT_CLONE_ON_PIN OFF)
 # todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
 # https://github.com/rapidsai/cuml/issues/4843
 message(VERBOSE "CUML_PY: Build only cuML CPU Python components.: ${CUML_CPU}")
-message(VERBOSE "CUML_PY: Searching for existing CUML C++ installations before defaulting to local files: ${FIND_CUML_CPP}")
 message(VERBOSE "CUML_PY: Disabling all mnmg components and comms libraries: ${SINGLEGPU}")
 
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")
@@ -76,19 +74,6 @@ include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
 
 find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
 
-# # If the user requested it, we attempt to find cuml.
-# if(FIND_CUML_CPP)
-#   # We need to call get_treelite explicitly because we need the correct
-#   # ${TREELITE_LIBS} definition for RF
-#   include(rapids-cpm)
-#   include(rapids-export)
-#   rapids_cpm_init()
-#   find_package(cuml ${CUML_VERSION} REQUIRED)
-#   include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
-# else()
-#   set(cuml_FOUND OFF)
-# endif()
-
 include(rapids-cython-core)
 
 # cuml-cpu does not need libcuml++.so
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index 30c6e11649..5e69571ee0 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -31,8 +31,6 @@ option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
 
 # todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
 # https://github.com/rapidsai/cuml/issues/4843
-message(VERBOSE "CUML_PY: Build only cuML CPU Python components.: ${CUML_CPU}")
-message(VERBOSE "CUML_PY: Searching for existing CUML C++ installations before defaulting to local files: ${FIND_CUML_CPP}")
 message(VERBOSE "CUML_PY: Disabling all mnmg components and comms libraries: ${SINGLEGPU}")
 
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")

From e99a2b7635a0897aefbbb003e7196a97633b1807 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 31 Dec 2024 12:37:21 -0800
Subject: [PATCH 04/48] use static treelite

---
 .gitignore                    | 1 +
 python/cuml/CMakeLists.txt    | 7 +++----
 python/libcuml/CMakeLists.txt | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index dc0b98d735..61e8202c15 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ tmp/
 .hypothesis
 wheels/
 wheelhouse/
+raft_log.txt
 _skbuild/
 
 ## files pickled in notebook when ran during python docstring generation
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 92cb410ae8..4d27bfcd5e 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -64,10 +64,9 @@ include(rapids-export)
 rapids_cpm_init()
 
 set(CUML_EXPORT_TREELITE_LINKAGE ON)
-set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)
-# use dynamic treelite (provided by treelite wheels)
-# TODO(jameslamb): does this make sense?
-set(CUML_USE_TREELITE_STATIC OFF)
+set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
+# TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
+set(CUML_USE_TREELITE_STATIC ON)
 # TODO(jameslamb): should treelite just be included here?
 set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
 include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index 5e69571ee0..a6a96b1ecd 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -62,10 +62,10 @@ set(CUML_RAFT_CLONE_ON_PIN OFF)
 
 # --- treelite --- #
 set(CUML_EXPORT_TREELITE_LINKAGE ON)
-set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)
+set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
 # use dynamic treelite (provided by treelite wheels)
-# TODO(jameslamb): does this make sense?
-set(CUML_USE_TREELITE_STATIC OFF)
+# TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
+set(CUML_USE_TREELITE_STATIC ON)
 # TODO(jameslamb): should treelite just be included here?
 set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
 

From 8082c45211d9992b4a68dd7f2f8d9e15323034b1 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 31 Dec 2024 13:07:13 -0800
Subject: [PATCH 05/48] one more merge conflict

---
 python/cuml/CMakeLists.txt | 56 --------------------------------------
 1 file changed, 56 deletions(-)

diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 01a4e74d14..4d27bfcd5e 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -77,63 +77,7 @@ include(rapids-cython-core)
 
 # cuml-cpu does not need libcuml++.so
 if(NOT CUML_CPU)
-<<<<<<< HEAD
   # (moved to libcuml/CMakeLists.txt)
-=======
-  if(NOT cuml_FOUND)
-    find_package(CUDAToolkit REQUIRED)
-
-    set(BUILD_CUML_TESTS OFF)
-    set(BUILD_PRIMS_TESTS OFF)
-    set(BUILD_CUML_C_LIBRARY OFF)
-    set(BUILD_CUML_EXAMPLES OFF)
-    set(BUILD_CUML_BENCH OFF)
-    set(CUML_EXPORT_TREELITE_LINKAGE ON)
-    set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
-
-    # Statically link dependencies if building wheels
-    set(CUDA_STATIC_RUNTIME ON)
-    set(CUML_USE_CUVS_STATIC ON)
-    set(CUML_USE_TREELITE_STATIC ON)
-    set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
-    # Link to the CUDA wheels with shared libraries for CUDA 12+
-    if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0)
-      set(CUDA_STATIC_MATH_LIBRARIES OFF)
-    else()
-      if(USE_CUDA_MATH_WHEELS)
-        message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0")
-      endif()
-      set(CUDA_STATIC_MATH_LIBRARIES ON)
-    endif()
-    # Don't install the static libs into wheels
-    set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
-    set(RAFT_EXCLUDE_FAISS_FROM_ALL ON)
-    set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
-    set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
-
-    add_subdirectory(${CUML_CPP_SRC} cuml-cpp EXCLUDE_FROM_ALL)
-
-    if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
-      set(rpaths
-        "$ORIGIN/../nvidia/cublas/lib"
-        "$ORIGIN/../nvidia/cufft/lib"
-        "$ORIGIN/../nvidia/curand/lib"
-        "$ORIGIN/../nvidia/cusolver/lib"
-        "$ORIGIN/../nvidia/cusparse/lib"
-        "$ORIGIN/../nvidia/nvjitlink/lib"
-      )
-      set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
-    endif()
-
-    if(USE_CUVS_WHEEL)
-      set(rpaths "$ORIGIN/../cuvs")
-      set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
-    endif()
-
-    set(cython_lib_dir cuml)
-    install(TARGETS ${CUML_CPP_TARGET} DESTINATION ${cython_lib_dir})
-  endif()
->>>>>>> branch-25.02
 endif()
 
 if(CUML_CPU)

From 8434d7733769c567bad313303d03e0b50ffb605e Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 2 Jan 2025 08:33:35 -0800
Subject: [PATCH 06/48] more fiddling

---
 python/cuml/CMakeLists.txt                    | 45 ++++++++++++++++++-
 python/cuml/cuml/__init__.py                  |  2 +-
 python/cuml/cuml/cluster/CMakeLists.txt       |  2 +-
 .../cuml/cuml/cluster/hdbscan/CMakeLists.txt  |  2 +-
 python/cuml/cuml/common/CMakeLists.txt        |  2 +-
 python/cuml/cuml/datasets/CMakeLists.txt      |  2 +-
 python/cuml/cuml/decomposition/CMakeLists.txt |  2 +-
 python/cuml/cuml/ensemble/CMakeLists.txt      |  2 +-
 .../cuml/cuml/experimental/fil/CMakeLists.txt |  2 +-
 .../experimental/linear_model/CMakeLists.txt  |  2 +-
 python/cuml/cuml/explainer/CMakeLists.txt     |  2 +-
 python/cuml/cuml/fil/CMakeLists.txt           |  2 +-
 python/cuml/cuml/internals/CMakeLists.txt     |  2 +-
 python/cuml/cuml/kernel_ridge/CMakeLists.txt  |  2 +-
 python/cuml/cuml/linear_model/CMakeLists.txt  |  2 +-
 python/cuml/cuml/manifold/CMakeLists.txt      |  2 +-
 python/cuml/cuml/metrics/CMakeLists.txt       |  2 +-
 .../cuml/cuml/metrics/cluster/CMakeLists.txt  |  2 +-
 python/cuml/cuml/neighbors/CMakeLists.txt     |  2 +-
 .../cuml/random_projection/CMakeLists.txt     |  2 +-
 python/cuml/cuml/solvers/CMakeLists.txt       |  2 +-
 python/cuml/cuml/svm/CMakeLists.txt           |  2 +-
 python/cuml/cuml/tsa/CMakeLists.txt           |  2 +-
 python/cuml/pyproject.toml                    |  1 +
 python/libcuml/CMakeLists.txt                 | 34 +++++++++-----
 python/libcuml/libcuml/__init__.py            |  2 +-
 python/libcuml/libcuml/_version.py            |  2 +-
 python/libcuml/libcuml/load.py                |  2 +-
 28 files changed, 92 insertions(+), 38 deletions(-)

diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 4d27bfcd5e..d0a782b9c0 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -37,7 +37,6 @@ project(
 # - User Options  --------------------------------------------------------------
 option(CUML_UNIVERSAL "Build all cuML Python components." ON)
 option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
-option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF)
 option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
 set(CUML_RAFT_CLONE_ON_PIN OFF)
 
@@ -54,6 +53,13 @@ set(CUML_CPP_SRC "../../cpp")
 ################################################################################
 # - Process User Options  ------------------------------------------------------
 
+# --- CCCL, RAFT, and RMM --- #
+# CCCL before RMM, and RMM before RAFT
+include(cmake/thirdparty/get_cccl.cmake)
+include(cmake/thirdparty/get_rmm.cmake)
+include(cmake/thirdparty/get_raft.cmake)
+
+# --- treelite --- #
 # We need to call get_treelite explicitly because we need the correct
 # ${TREELITE_LIBS} definition for RF
 #
@@ -71,11 +77,46 @@ set(CUML_USE_TREELITE_STATIC ON)
 set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
 include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
 
+# get nlohmann_json and rapidsjson
+# TODO(jameslamb): maybe this isn't necessary?
+function(copy_interface_excludes)
+  set(_options "")
+  set(_one_value TARGET INCLUDED_TARGET)
+  set(_multi_value "")
+  cmake_parse_arguments(_CUML_INCLUDES "${_options}" "${_one_value}"
+                        "${_multi_value}" ${ARGN})
+  get_target_property(_includes ${_CUML_INCLUDES_INCLUDED_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
+  target_include_directories(${_CUML_INCLUDES_TARGET} PUBLIC ${_includes})
+endfunction()
+copy_interface_excludes(INCLUDED_TARGET treelite::treelite_static TARGET ${CUML_CPP_TARGET})
+
+# --- cuvs --- #
+ 
+if(USE_CUVS_WHEEL)
+  set(CUML_USE_CUVS_STATIC OFF)
+else()
+  set(CUML_USE_CUVS_STATIC ON)
+endif()
+
+# either way, don't unclude any cuvs stuff in wheel
+# (expect anything building against libcuml to provide cuvs headers externally)
+set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
+
+# download this again just to get the headers
+include(${CUML_CPP_SRC}/cmake/thirdparty/get_cuvs.cmake)
+
+# --- cumlprims_mg --- #
+set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
+# TODO(jameslamb): figure out how cumlprims_mg should work (re-download in cuml? only install/export the headers in libcuml?)
+set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
+include(${CUML_CPP_SRC}/cmake/thirdparty/get_cumlprims_mg.cmake)
+
 find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
 
 include(rapids-cython-core)
 
 # cuml-cpu does not need libcuml++.so
+# TODO(jameslamb): maybe all the stuff above is unnecessary in the CPU-only case?
 if(NOT CUML_CPU)
   # (moved to libcuml/CMakeLists.txt)
 endif()
diff --git a/python/cuml/cuml/__init__.py b/python/cuml/cuml/__init__.py
index a536979c1c..a8557c84fc 100644
--- a/python/cuml/cuml/__init__.py
+++ b/python/cuml/cuml/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/cuml/cuml/cluster/CMakeLists.txt b/python/cuml/cuml/cluster/CMakeLists.txt
index 09b3495f1f..ac012ff510 100644
--- a/python/cuml/cuml/cluster/CMakeLists.txt
+++ b/python/cuml/cuml/cluster/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt b/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt
index 5c4eda2264..5a85a97b2d 100644
--- a/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt
+++ b/python/cuml/cuml/cluster/hdbscan/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/common/CMakeLists.txt b/python/cuml/cuml/common/CMakeLists.txt
index 146f94ab6d..df6cee81cb 100644
--- a/python/cuml/cuml/common/CMakeLists.txt
+++ b/python/cuml/cuml/common/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/datasets/CMakeLists.txt b/python/cuml/cuml/datasets/CMakeLists.txt
index ab4e981766..64c2b483f6 100644
--- a/python/cuml/cuml/datasets/CMakeLists.txt
+++ b/python/cuml/cuml/datasets/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/decomposition/CMakeLists.txt b/python/cuml/cuml/decomposition/CMakeLists.txt
index 43a619c4c2..71f36d57af 100644
--- a/python/cuml/cuml/decomposition/CMakeLists.txt
+++ b/python/cuml/cuml/decomposition/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/ensemble/CMakeLists.txt b/python/cuml/cuml/ensemble/CMakeLists.txt
index 7e7ea7c4bb..38999083e6 100644
--- a/python/cuml/cuml/ensemble/CMakeLists.txt
+++ b/python/cuml/cuml/ensemble/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/experimental/fil/CMakeLists.txt b/python/cuml/cuml/experimental/fil/CMakeLists.txt
index 4b6ea7ce01..d2baab6642 100644
--- a/python/cuml/cuml/experimental/fil/CMakeLists.txt
+++ b/python/cuml/cuml/experimental/fil/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/experimental/linear_model/CMakeLists.txt b/python/cuml/cuml/experimental/linear_model/CMakeLists.txt
index f9cf06d7a8..eb367d9a56 100644
--- a/python/cuml/cuml/experimental/linear_model/CMakeLists.txt
+++ b/python/cuml/cuml/experimental/linear_model/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/explainer/CMakeLists.txt b/python/cuml/cuml/explainer/CMakeLists.txt
index 3ba1cca686..213bb74a78 100644
--- a/python/cuml/cuml/explainer/CMakeLists.txt
+++ b/python/cuml/cuml/explainer/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/fil/CMakeLists.txt b/python/cuml/cuml/fil/CMakeLists.txt
index a078c964dc..816e8aa7c8 100644
--- a/python/cuml/cuml/fil/CMakeLists.txt
+++ b/python/cuml/cuml/fil/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/internals/CMakeLists.txt b/python/cuml/cuml/internals/CMakeLists.txt
index a3b0cbf884..a363fb7323 100644
--- a/python/cuml/cuml/internals/CMakeLists.txt
+++ b/python/cuml/cuml/internals/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/kernel_ridge/CMakeLists.txt b/python/cuml/cuml/kernel_ridge/CMakeLists.txt
index c8064cdf3c..1bf0d0a1e0 100644
--- a/python/cuml/cuml/kernel_ridge/CMakeLists.txt
+++ b/python/cuml/cuml/kernel_ridge/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/linear_model/CMakeLists.txt b/python/cuml/cuml/linear_model/CMakeLists.txt
index f526309cb6..cfa0c3ab05 100644
--- a/python/cuml/cuml/linear_model/CMakeLists.txt
+++ b/python/cuml/cuml/linear_model/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/manifold/CMakeLists.txt b/python/cuml/cuml/manifold/CMakeLists.txt
index 7daa7a7759..0ca860afe7 100644
--- a/python/cuml/cuml/manifold/CMakeLists.txt
+++ b/python/cuml/cuml/manifold/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/metrics/CMakeLists.txt b/python/cuml/cuml/metrics/CMakeLists.txt
index 716558a32f..a56575ccb8 100644
--- a/python/cuml/cuml/metrics/CMakeLists.txt
+++ b/python/cuml/cuml/metrics/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/metrics/cluster/CMakeLists.txt b/python/cuml/cuml/metrics/cluster/CMakeLists.txt
index 02e677e76a..fbbde707f4 100644
--- a/python/cuml/cuml/metrics/cluster/CMakeLists.txt
+++ b/python/cuml/cuml/metrics/cluster/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/neighbors/CMakeLists.txt b/python/cuml/cuml/neighbors/CMakeLists.txt
index 0a8be580d5..6658ddc5f2 100644
--- a/python/cuml/cuml/neighbors/CMakeLists.txt
+++ b/python/cuml/cuml/neighbors/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/random_projection/CMakeLists.txt b/python/cuml/cuml/random_projection/CMakeLists.txt
index e607e64973..012382a1bb 100644
--- a/python/cuml/cuml/random_projection/CMakeLists.txt
+++ b/python/cuml/cuml/random_projection/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/solvers/CMakeLists.txt b/python/cuml/cuml/solvers/CMakeLists.txt
index d66521c49d..ad83508254 100644
--- a/python/cuml/cuml/solvers/CMakeLists.txt
+++ b/python/cuml/cuml/solvers/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/svm/CMakeLists.txt b/python/cuml/cuml/svm/CMakeLists.txt
index 6b1d13a176..5d19df1a4a 100644
--- a/python/cuml/cuml/svm/CMakeLists.txt
+++ b/python/cuml/cuml/svm/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/cuml/tsa/CMakeLists.txt b/python/cuml/cuml/tsa/CMakeLists.txt
index dca351710c..92552e9ff4 100644
--- a/python/cuml/cuml/tsa/CMakeLists.txt
+++ b/python/cuml/cuml/tsa/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index a6d7c6c618..be4f2b269e 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -183,6 +183,7 @@ requires = [
     "cuda-python",
     "cuvs==25.2.*,>=0.0.0a0",
     "cython>=3.0.0",
+    "libraft=25.2.*,>=0.0.0a0",
     "librmm==25.2.*,>=0.0.0a0",
     "ninja",
     "pylibraft==25.2.*,>=0.0.0a0",
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index a6a96b1ecd..467775fdd7 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -51,14 +51,11 @@ endif()
 
 unset(cuml_FOUND)
 
-find_package(CUDAToolkit REQUIRED)
-
 set(BUILD_CUML_TESTS OFF)
 set(BUILD_PRIMS_TESTS OFF)
 set(BUILD_CUML_C_LIBRARY OFF)
 set(BUILD_CUML_EXAMPLES OFF)
 set(BUILD_CUML_BENCH OFF)
-set(CUML_RAFT_CLONE_ON_PIN OFF)
 
 # --- treelite --- #
 set(CUML_EXPORT_TREELITE_LINKAGE ON)
@@ -69,17 +66,32 @@ set(CUML_USE_TREELITE_STATIC ON)
 # TODO(jameslamb): should treelite just be included here?
 set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
 
-# Statically link dependencies if building wheels
-set(CUDA_STATIC_RUNTIME ON)
-set(CUML_USE_CUVS_STATIC ON)
-set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
-
-# Don't install the static libs into wheels
+# --- raft --- #
+set(CUML_RAFT_CLONE_ON_PIN OFF)
 set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
+
+# --- cumlprims_mg --- #
+set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
 # TODO(jameslamb): figure out how cumlprims_mg should work (re-download in cuml? only install/export the headers in libcuml?)
-set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL OFF)
+set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
+
+# --- cuvs --- #
+ 
+if(USE_CUVS_WHEEL)
+  set(CUML_USE_CUVS_STATIC OFF)
+else()
+  set(CUML_USE_CUVS_STATIC ON)
+endif()
+
+# either way, don't unclude any cuvs stuff in wheel
+# (expect anything building against libcuml to provide cuvs headers externally)
+set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
+
+# --- CUDA --- #
+set(CUDA_STATIC_RUNTIME ON)
 
 # Link to the CUDA wheels with shared libraries for CUDA 12+
+find_package(CUDAToolkit REQUIRED)
 if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0)
   set(CUDA_STATIC_MATH_LIBRARIES OFF)
 else()
diff --git a/python/libcuml/libcuml/__init__.py b/python/libcuml/libcuml/__init__.py
index d86d2f1066..73d050e883 100644
--- a/python/libcuml/libcuml/__init__.py
+++ b/python/libcuml/libcuml/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/libcuml/libcuml/_version.py b/python/libcuml/libcuml/_version.py
index b347d590c5..09fcb13f3d 100644
--- a/python/libcuml/libcuml/_version.py
+++ b/python/libcuml/libcuml/_version.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
index 5e72d7ec6f..e01e91c87a 100644
--- a/python/libcuml/libcuml/load.py
+++ b/python/libcuml/libcuml/load.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 9bda0488c07167ef40a42e803a09b10a8d7f824b Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 2 Jan 2025 11:50:30 -0800
Subject: [PATCH 07/48] more changes

---
 build.sh                                      |  2 +-
 ci/build_wheel.sh                             |  2 +-
 ci/build_wheel_cuml.sh                        |  2 +-
 ci/build_wheel_libcuml.sh                     |  2 +-
 ci/test_wheel.sh                              |  2 +-
 ci/validate_wheel.sh                          |  2 +-
 .../all_cuda-118_arch-x86_64.yaml             |  1 +
 .../all_cuda-125_arch-x86_64.yaml             |  1 +
 cpp/CMakeLists.txt                            |  5 ++--
 cpp/cmake/thirdparty/get_cuvs.cmake           | 10 +++----
 dependencies.yaml                             |  4 ++-
 python/cuml/CMakeLists.txt                    | 27 +++++++++----------
 python/cuml/pyproject.toml                    |  1 +
 13 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/build.sh b/build.sh
index 03c44c033f..6f1b6b9f83 100755
--- a/build.sh
+++ b/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 # cuml build script
 
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 78b8a8a08c..3c840d9849 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 1969aa9315..9ed720fbd3 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index e563aed285..9f09c449d7 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index acecf55024..8027876005 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index 1bd68ab7a1..39beda002c 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index bf73b938a5..6eb49b7dfd 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -43,6 +43,7 @@ dependencies:
 - libcusparse=11.7.5.86
 - libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
+- libraft=25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
 - ninja
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 72539f2d18..ae531dd438 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -40,6 +40,7 @@ dependencies:
 - libcusparse-dev
 - libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
+- libraft=25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
 - ninja
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 0e9dfa04d0..baba572f16 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -602,6 +602,7 @@ if(BUILD_CUML_CPP_LIBRARY)
   # These are always private:
   list(APPEND _cuml_cpp_private_libs
     raft::raft
+    rmm::rmm_logger
     rmm::rmm_logger_impl
     raft::raft_logger_impl
     $<TARGET_NAME_IF_EXISTS:GPUTreeShap::GPUTreeShap>
@@ -628,7 +629,7 @@ if(BUILD_CUML_CPP_LIBRARY)
   )
 
   target_link_libraries(${CUML_CPP_TARGET}
-    PUBLIC  rmm::rmm rmm::rmm_logger ${CUVS_LIB}
+    PUBLIC  rmm::rmm ${CUVS_LIB}
             ${_cuml_cpp_public_libs}
     PRIVATE ${_cuml_cpp_private_libs}
   )
diff --git a/cpp/cmake/thirdparty/get_cuvs.cmake b/cpp/cmake/thirdparty/get_cuvs.cmake
index a46879c3e7..03d4077923 100644
--- a/cpp/cmake/thirdparty/get_cuvs.cmake
+++ b/cpp/cmake/thirdparty/get_cuvs.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
 # limitations under the License.
 #=============================================================================
 
-set(CUML_MIN_VERSION_cuvs "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}.00")
-set(CUML_BRANCH_VERSION_cuvs "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}")
+set(CUML_MIN_VERSION_cuvs "${RAPIDS_VERSION_MAJOR}.${RAPIDS_VERSION_MINOR}.00")
+set(CUML_BRANCH_VERSION_cuvs "${RAPIDS_VERSION_MAJOR}.${RAPIDS_VERSION_MINOR}")
 
 function(find_and_configure_cuvs)
     set(oneValueArgs VERSION FORK PINNED_TAG EXCLUDE_FROM_ALL USE_CUVS_STATIC COMPILE_LIBRARY CLONE_ON_PIN)
@@ -54,9 +54,9 @@ function(find_and_configure_cuvs)
         EXCLUDE_FROM_ALL       ${PKG_EXCLUDE_FROM_ALL}
         OPTIONS
           "BUILD_TESTS OFF"
-          "BUILD_BENCH OFF"
+          "BUILD_CAGRA_HNSWLIB OFF"
+          "BUILD_CUVS_BENCH OFF"
           "BUILD_MG_ALGOS ${CUVS_BUILD_MG_ALGOS}"
-
     )
 
     if(cuvs_ADDED)
diff --git a/dependencies.yaml b/dependencies.yaml
index 93a8640f63..391b99dc08 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -15,6 +15,7 @@ files:
       - depends_on_dask_cudf
       - depends_on_libcumlprims
       - depends_on_libcuvs
+      - depends_on_libraft
       - depends_on_libraft_headers
       - depends_on_librmm
       - depends_on_pylibraft
@@ -110,8 +111,9 @@ files:
     includes:
       - common_build
       - depends_on_cuvs
+      - depends_on_libcuml
       - depends_on_libcumlprims
-      - depends_on_libraft_headers
+      - depends_on_libraft
       - depends_on_librmm
       - depends_on_pylibraft
       - depends_on_rmm
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index d0a782b9c0..55ee1ccc2c 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -55,9 +55,9 @@ set(CUML_CPP_SRC "../../cpp")
 
 # --- CCCL, RAFT, and RMM --- #
 # CCCL before RMM, and RMM before RAFT
-include(cmake/thirdparty/get_cccl.cmake)
-include(cmake/thirdparty/get_rmm.cmake)
-include(cmake/thirdparty/get_raft.cmake)
+# include(cmake/thirdparty/get_cccl.cmake)
+# include(cmake/thirdparty/get_rmm.cmake)
+# include(cmake/thirdparty/get_raft.cmake)
 
 # --- treelite --- #
 # We need to call get_treelite explicitly because we need the correct
@@ -79,19 +79,18 @@ include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
 
 # get nlohmann_json and rapidsjson
 # TODO(jameslamb): maybe this isn't necessary?
-function(copy_interface_excludes)
-  set(_options "")
-  set(_one_value TARGET INCLUDED_TARGET)
-  set(_multi_value "")
-  cmake_parse_arguments(_CUML_INCLUDES "${_options}" "${_one_value}"
-                        "${_multi_value}" ${ARGN})
-  get_target_property(_includes ${_CUML_INCLUDES_INCLUDED_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
-  target_include_directories(${_CUML_INCLUDES_TARGET} PUBLIC ${_includes})
-endfunction()
-copy_interface_excludes(INCLUDED_TARGET treelite::treelite_static TARGET ${CUML_CPP_TARGET})
+# function(copy_interface_excludes)
+#   set(_options "")
+#   set(_one_value TARGET INCLUDED_TARGET)
+#   set(_multi_value "")
+#   cmake_parse_arguments(_CUML_INCLUDES "${_options}" "${_one_value}"
+#                         "${_multi_value}" ${ARGN})
+#   get_target_property(_includes ${_CUML_INCLUDES_INCLUDED_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
+#   target_include_directories(${_CUML_INCLUDES_TARGET} PUBLIC ${_includes})
+# endfunction()
+# copy_interface_excludes(INCLUDED_TARGET treelite::treelite_static TARGET ${CUML_CPP_TARGET})
 
 # --- cuvs --- #
- 
 if(USE_CUVS_WHEEL)
   set(CUML_USE_CUVS_STATIC OFF)
 else()
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index be4f2b269e..b7a6e2c424 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -183,6 +183,7 @@ requires = [
     "cuda-python",
     "cuvs==25.2.*,>=0.0.0a0",
     "cython>=3.0.0",
+    "libcuml==25.2.*,>=0.0.0a0",
     "libraft=25.2.*,>=0.0.0a0",
     "librmm==25.2.*,>=0.0.0a0",
     "ninja",

From eb14ede0f22a3e6855472357f81729625bb25dd4 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 6 Jan 2025 10:13:23 -0800
Subject: [PATCH 08/48] more changes

---
 cpp/cmake/thirdparty/get_cccl.cmake         |  6 +++---
 cpp/cmake/thirdparty/get_cumlprims_mg.cmake |  9 ++++-----
 cpp/cmake/thirdparty/get_cuvs.cmake         |  3 +--
 cpp/cmake/thirdparty/get_raft.cmake         |  9 ++++-----
 cpp/cmake/thirdparty/get_rmm.cmake          |  6 +++---
 python/cuml/CMakeLists.txt                  | 12 ++++++++++--
 6 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/cpp/cmake/thirdparty/get_cccl.cmake b/cpp/cmake/thirdparty/get_cccl.cmake
index 0c126e320e..991b4e4edd 100644
--- a/cpp/cmake/thirdparty/get_cccl.cmake
+++ b/cpp/cmake/thirdparty/get_cccl.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -15,8 +15,8 @@
 # Use CPM to find or clone CCCL
 function(find_and_configure_cccl)
         include(${rapids-cmake-dir}/cpm/cccl.cmake)
-        rapids_cpm_cccl(BUILD_EXPORT_SET cuml-exports
-                        INSTALL_EXPORT_SET cuml-exports)
+        # TODO(jameslamb): justify not exporting cccl?
+        rapids_cpm_cccl()
 endfunction()
 
 find_and_configure_cccl()
diff --git a/cpp/cmake/thirdparty/get_cumlprims_mg.cmake b/cpp/cmake/thirdparty/get_cumlprims_mg.cmake
index 0bbc91fc17..0799f6fe5f 100644
--- a/cpp/cmake/thirdparty/get_cumlprims_mg.cmake
+++ b/cpp/cmake/thirdparty/get_cumlprims_mg.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
 # limitations under the License.
 #=============================================================================
 
-set(CUML_MIN_VERSION_cumlprims_mg "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}.00")
-set(CUML_BRANCH_VERSION_cumlprims_mg "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}")
+set(CUML_MIN_VERSION_cumlprims_mg "${RAPIDS_VERSION_MAJOR}.${RAPIDS_VERSION_MINOR}.00")
+set(CUML_BRANCH_VERSION_cumlprims_mg "${RAPIDS_VERSION_MAJOR}.${RAPIDS_VERSION_MINOR}")
 
 function(find_and_configure_cumlprims_mg)
 
@@ -35,10 +35,9 @@ function(find_and_configure_cumlprims_mg)
       set(CUMLPRIMS_MG_BUILD_SHARED_LIBS OFF)
     endif()
 
+    # TODO(jameslamb): justify not exporting cumlprims?
     rapids_cpm_find(cumlprims_mg ${PKG_VERSION}
       GLOBAL_TARGETS      cumlprims_mg::cumlprims_mg
-      BUILD_EXPORT_SET    cuml-exports
-      INSTALL_EXPORT_SET  cuml-exports
         CPM_ARGS
           GIT_REPOSITORY git@github.com:${PKG_FORK}/cumlprims_mg.git
           GIT_TAG        ${PKG_PINNED_TAG}
diff --git a/cpp/cmake/thirdparty/get_cuvs.cmake b/cpp/cmake/thirdparty/get_cuvs.cmake
index 03d4077923..03673ce6c8 100644
--- a/cpp/cmake/thirdparty/get_cuvs.cmake
+++ b/cpp/cmake/thirdparty/get_cuvs.cmake
@@ -43,10 +43,9 @@ function(find_and_configure_cuvs)
       set(CUVS_BUILD_MG_ALGOS OFF)
     endif()
 
+    # TODO(jameslamb): is not exporting cuvs ok here?
     rapids_cpm_find(cuvs ${PKG_VERSION}
       GLOBAL_TARGETS      cuvs::cuvs
-      BUILD_EXPORT_SET    cuml-exports
-      INSTALL_EXPORT_SET  cuml-exports
       CPM_ARGS
         GIT_REPOSITORY         https://github.com/${PKG_FORK}/cuvs.git
         GIT_TAG                ${PKG_PINNED_TAG}
diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index 3240c730c5..fa569927cd 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
 # limitations under the License.
 #=============================================================================
 
-set(CUML_MIN_VERSION_raft "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}.00")
-set(CUML_BRANCH_VERSION_raft "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}")
+set(CUML_MIN_VERSION_raft "${RAPIDS_VERSION_MAJOR}.${RAPIDS_VERSION_MINOR}.00")
+set(CUML_BRANCH_VERSION_raft "${RAPIDS_VERSION_MAJOR}.${RAPIDS_VERSION_MINOR}")
 
 function(find_and_configure_raft)
     set(oneValueArgs VERSION FORK PINNED_TAG EXCLUDE_FROM_ALL USE_RAFT_STATIC COMPILE_LIBRARY CLONE_ON_PIN NVTX)
@@ -42,10 +42,9 @@ function(find_and_configure_raft)
 
     message(VERBOSE "CUML: raft FIND_PACKAGE_ARGUMENTS COMPONENTS ${RAFT_COMPONENTS}")
 
+    # TODO(jameslamb): justify not exporting raft?
     rapids_cpm_find(raft ${PKG_VERSION}
       GLOBAL_TARGETS      raft::raft
-      BUILD_EXPORT_SET    cuml-exports
-      INSTALL_EXPORT_SET  cuml-exports
       COMPONENTS          ${RAFT_COMPONENTS}
       CPM_ARGS
         GIT_REPOSITORY         https://github.com/${PKG_FORK}/raft.git
diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake
index 35968f7245..f30204beaa 100644
--- a/cpp/cmake/thirdparty/get_rmm.cmake
+++ b/cpp/cmake/thirdparty/get_rmm.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,8 +16,8 @@
 
 function(find_and_configure_rmm)
     include(${rapids-cmake-dir}/cpm/rmm.cmake)
-    rapids_cpm_rmm(BUILD_EXPORT_SET cuml-exports
-                   INSTALL_EXPORT_SET cuml-exports)
+    # TODO(jameslamb): justify not exporting rmm?
+    rapids_cpm_rmm()
 endfunction()
 
 find_and_configure_rmm()
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 55ee1ccc2c..8b10db329e 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -55,9 +55,17 @@ set(CUML_CPP_SRC "../../cpp")
 
 # --- CCCL, RAFT, and RMM --- #
 # CCCL before RMM, and RMM before RAFT
+# TODO(jameslamb): skipping CCCL because we're already getting it from libraft... is that ok?
+#   -- Found CCCL: /pyenv/versions/3.12.8/lib/python3.12/site-packages/libraft/lib64/rapids/cmake/cccl/cccl-config.cmake (found version "2.7.0.0")
+#     CMake Error at /pyenv/versions/3.12.8/lib/python3.12/site-packages/libraft/lib64/cmake/NvidiaCutlass/NvidiaCutlassTargets.cmake:42 (message):
+#     Some (but not all) targets in this export set were already defined.
+#
+#     Targets Defined: nvidia::cutlass::cutlass
+#     Targets not yet defined: nvidia::cutlass::tools::util
+#
 # include(cmake/thirdparty/get_cccl.cmake)
-# include(cmake/thirdparty/get_rmm.cmake)
-# include(cmake/thirdparty/get_raft.cmake)
+include(cmake/thirdparty/get_rmm.cmake)
+include(cmake/thirdparty/get_raft.cmake)
 
 # --- treelite --- #
 # We need to call get_treelite explicitly because we need the correct

From 8c1f38dd62a0e4431b3eaf5a8f1e114b10fb66a0 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 6 Jan 2025 12:38:53 -0800
Subject: [PATCH 09/48] got importing minimally working

---
 python/cuml/CMakeLists.txt    | 12 ++++++------
 python/libcuml/CMakeLists.txt | 16 +++++++++-------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 8b10db329e..4f1a14a612 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -53,6 +53,10 @@ set(CUML_CPP_SRC "../../cpp")
 ################################################################################
 # - Process User Options  ------------------------------------------------------
 
+include(rapids-cpm)
+include(rapids-export)
+rapids_cpm_init()
+
 # --- CCCL, RAFT, and RMM --- #
 # CCCL before RMM, and RMM before RAFT
 # TODO(jameslamb): skipping CCCL because we're already getting it from libraft... is that ok?
@@ -64,8 +68,8 @@ set(CUML_CPP_SRC "../../cpp")
 #     Targets not yet defined: nvidia::cutlass::tools::util
 #
 # include(cmake/thirdparty/get_cccl.cmake)
-include(cmake/thirdparty/get_rmm.cmake)
-include(cmake/thirdparty/get_raft.cmake)
+include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
+include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
 
 # --- treelite --- #
 # We need to call get_treelite explicitly because we need the correct
@@ -73,10 +77,6 @@ include(cmake/thirdparty/get_raft.cmake)
 #
 # and it needs to come before find_package(cuml), because it's a PUBLIC
 # dependency of cuml::cuml
-include(rapids-cpm)
-include(rapids-export)
-rapids_cpm_init()
-
 set(CUML_EXPORT_TREELITE_LINKAGE ON)
 set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
 # TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index 467775fdd7..d2b0ae12bc 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -106,18 +106,20 @@ endif()
 add_subdirectory(../../cpp cuml-cpp)
 
 if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
+  # assumes libcuml++ is installed 2 levels deep, e.g. site-packages/cuml/lib64/libcuml++.so
   set(rpaths
-    "$ORIGIN/../nvidia/cublas/lib"
-    "$ORIGIN/../nvidia/cufft/lib"
-    "$ORIGIN/../nvidia/curand/lib"
-    "$ORIGIN/../nvidia/cusolver/lib"
-    "$ORIGIN/../nvidia/cusparse/lib"
-    "$ORIGIN/../nvidia/nvjitlink/lib"
+    "$ORIGIN/../../nvidia/cublas/lib"
+    "$ORIGIN/../../nvidia/cufft/lib"
+    "$ORIGIN/../../nvidia/curand/lib"
+    "$ORIGIN/../../nvidia/cusolver/lib"
+    "$ORIGIN/../../nvidia/cusparse/lib"
+    "$ORIGIN/../../nvidia/nvjitlink/lib"
   )
   set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
 endif()
 
 if(USE_CUVS_WHEEL)
-  set(rpaths "$ORIGIN/../cuvs")
+  # assumes libcuml++ is installed 2 levels deep, e.g. site-packages/cuml/lib64/libcuml++.so
+  set(rpaths "$ORIGIN/../../cuvs")
   set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
 endif()

From 7c095984bceef359b84cf123ea931b6e8ab108b6 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 6 Jan 2025 13:59:14 -0800
Subject: [PATCH 10/48] use conda packages from RAFT PR

---
 ci/build_cpp.sh                   |  3 +++
 ci/build_docs.sh                  |  3 +++
 ci/build_python.sh                |  3 +++
 ci/build_wheel_cuml.sh            |  3 +++
 ci/build_wheel_libcuml.sh         |  3 +++
 ci/test_cpp.sh                    |  3 +++
 ci/test_notebooks.sh              |  3 +++
 ci/test_python_common.sh          |  3 +++
 ci/test_wheel.sh                  |  1 -
 ci/use_conda_packages_from_prs.sh | 10 ++++++++++
 ci/use_wheels_from_prs.sh         | 18 ++++++++++++++++++
 11 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 ci/use_conda_packages_from_prs.sh
 create mode 100644 ci/use_wheels_from_prs.sh

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index fa066c03c6..cd85816356 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -9,6 +9,9 @@ source rapids-configure-sccache
 
 source rapids-date-string
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 05f1f24ee5..5fdbf50424 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -8,6 +8,9 @@ rapids-logger "Create test conda environment"
 RAPIDS_VERSION="$(rapids-version)"
 export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 rapids-dependency-file-generator \
   --output conda \
   --file-key docs \
diff --git a/ci/build_python.sh b/ci/build_python.sh
index e8d2b3ed39..05a895fa82 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -9,6 +9,9 @@ source rapids-configure-sccache
 
 source rapids-date-string
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 9ed720fbd3..8b3676cc9d 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -8,6 +8,9 @@ package_dir="python/cuml"
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ./ci/use_wheels_from_prs.sh
+
 # TODO(jameslamb): split this out into build_wheel_{cuml,libcuml}.sh
 # TODO(jameslamb): add libcuml++.so to cuml exclusions
 case "${RAPIDS_CUDA_VERSION}" in
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index 9f09c449d7..72c2b46072 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -8,6 +8,9 @@ package_dir="python/libcuml"
 
 rapids-logger "Generating build requirements"
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ./ci/use_wheels_from_prs.sh
+
 rapids-dependency-file-generator \
   --output requirements \
   --file-key "py_build_${package_name}" \
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index ea6d1cdc11..b608e094f0 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -11,6 +11,9 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 rapids-logger "Generate C++ testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index d76a754b8b..91fd2e51d3 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -8,6 +8,9 @@ rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 rapids-logger "Generate Notebook testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index 5f1894356c..1da32ddafe 100644
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -9,6 +9,9 @@ rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 rapids-logger "Generate Python testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index 8027876005..4924a17982 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -20,7 +20,6 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
-
 rapids-logger "pytest cuml single GPU"
 ./ci/run_cuml_singlegpu_pytests.sh \
   --numprocesses=8 \
diff --git a/ci/use_conda_packages_from_prs.sh b/ci/use_conda_packages_from_prs.sh
new file mode 100644
index 0000000000..4708dc8587
--- /dev/null
+++ b/ci/use_conda_packages_from_prs.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+RAFT_COMMIT="345f0e556b602ec65b5eebe825ffd000d61706fe"
+
+RAFT_CPP_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}")
+RAFT_PYTHON_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 python "${RAFT_COMMIT:0:7}")
+
+conda config --system --add channels "${RAFT_CPP_CHANNEL}"
+conda config --system --add channels "${RAFT_PYTHON_CHANNEL}"
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
new file mode 100644
index 0000000000..aa9d83a03f
--- /dev/null
+++ b/ci/use_wheels_from_prs.sh
@@ -0,0 +1,18 @@
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+RAFT_COMMIT="345f0e556b602ec65b5eebe825ffd000d61706fe"
+LIBRAFT_CHANNEL=$(
+  RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}"
+)
+PYLIBRAFT_CHANNEL=$(
+  RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 python "${RAFT_COMMIT:0:7}"
+)
+RAFT_DASK_CHANNEL=$(
+  RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 python "${RAFT_COMMIT:0:7}"
+)
+cat > ./constraints.txt <<EOF
+libraft-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${LIBRAFT_CHANNEL}/libraft_*.whl)
+pylibraft-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${PYLIBRAFT_CHANNEL}/pylibraft_*.whl)
+raft-dask-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${RAFT_DASK_CHANNEL}/raft_dask_*.whl)
+EOF
+
+export PIP_CONSTRAINT=$(pwd)/constraints.txt

From c7ad4509a0205bd824bf9a6e135e905ddb0a78dd Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 6 Jan 2025 14:12:33 -0800
Subject: [PATCH 11/48] pre-commit

---
 ci/build_cpp.sh          | 2 +-
 ci/build_docs.sh         | 2 +-
 ci/build_python.sh       | 2 +-
 ci/test_cpp.sh           | 2 +-
 ci/test_notebooks.sh     | 2 +-
 ci/test_python_common.sh | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index cd85816356..7c35f35be5 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 5fdbf50424..a1cc16eb10 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 set -euo pipefail
 
 rapids-logger "Create test conda environment"
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 05a895fa82..d0b5724f89 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index b608e094f0..9535d43ae7 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index 91fd2e51d3..c3d8ec7453 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index 1da32ddafe..0430c553c3 100644
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 

From 8d058e6908dff64a3b28e01464293fe4618091a1 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 6 Jan 2025 14:37:32 -0800
Subject: [PATCH 12/48] copyright

---
 ci/use_conda_packages_from_prs.sh | 2 +-
 ci/use_wheels_from_prs.sh         | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ci/use_conda_packages_from_prs.sh b/ci/use_conda_packages_from_prs.sh
index 4708dc8587..0dc498e129 100644
--- a/ci/use_conda_packages_from_prs.sh
+++ b/ci/use_conda_packages_from_prs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
 # TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
 RAFT_COMMIT="345f0e556b602ec65b5eebe825ffd000d61706fe"
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
index aa9d83a03f..4eefb31f9c 100644
--- a/ci/use_wheels_from_prs.sh
+++ b/ci/use_wheels_from_prs.sh
@@ -1,3 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
 # TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
 RAFT_COMMIT="345f0e556b602ec65b5eebe825ffd000d61706fe"
 LIBRAFT_CHANNEL=$(

From cbf26a297e1e119235123dfe620eeb75cd32fa29 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 6 Jan 2025 14:53:04 -0800
Subject: [PATCH 13/48] fix scripts

---
 ci/use_conda_packages_from_prs.sh | 2 +-
 ci/use_wheels_from_prs.sh         | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/ci/use_conda_packages_from_prs.sh b/ci/use_conda_packages_from_prs.sh
index 0dc498e129..90293b71fa 100644
--- a/ci/use_conda_packages_from_prs.sh
+++ b/ci/use_conda_packages_from_prs.sh
@@ -1,6 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+# TODO(jameslamb): remove this file when https://github.com/rapidsai/raft/pull/2531 is merged
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
 RAFT_COMMIT="345f0e556b602ec65b5eebe825ffd000d61706fe"
 
 RAFT_CPP_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}")
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
index 4eefb31f9c..565111dc6a 100644
--- a/ci/use_wheels_from_prs.sh
+++ b/ci/use_wheels_from_prs.sh
@@ -1,6 +1,8 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+# TODO(jameslamb): remove this file when https://github.com/rapidsai/raft/pull/2531 is merged
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
 RAFT_COMMIT="345f0e556b602ec65b5eebe825ffd000d61706fe"
 LIBRAFT_CHANNEL=$(
   RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}"

From dc8a5a4e0792eee2c9f626674339debb591620d4 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 6 Jan 2025 18:25:35 -0800
Subject: [PATCH 14/48] fix wheel downloads

---
 .github/workflows/pr.yaml | 26 +++++++++++++-------------
 ci/use_wheels_from_prs.sh |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index abc54e424a..8e742c2076 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -28,7 +28,7 @@ jobs:
       - wheel-build-libcuml
       - wheel-build-cuml
       - wheel-tests-cuml
-      - devcontainer
+      # - devcontainer
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02
     if: always()
@@ -197,18 +197,18 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel.sh
-  devcontainer:
-    needs: telemetry-setup
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02
-    with:
-      arch: '["amd64"]'
-      cuda: '["12.5"]'
-      extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
-      build_command: |
-        sccache -z;
-        build-all --verbose;
-        sccache -s;
+  # devcontainer:
+  #   needs: telemetry-setup
+  #   secrets: inherit
+  #   uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02
+  #   with:
+  #     arch: '["amd64"]'
+  #     cuda: '["12.5"]'
+  #     extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
+  #     build_command: |
+  #       sccache -z;
+  #       build-all --verbose;
+  #       sccache -s;
 
   telemetry-summarize:
     # This job must use a self-hosted runner to record telemetry traces.
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
index 565111dc6a..66b002dd4f 100644
--- a/ci/use_wheels_from_prs.sh
+++ b/ci/use_wheels_from_prs.sh
@@ -8,7 +8,7 @@ LIBRAFT_CHANNEL=$(
   RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}"
 )
 PYLIBRAFT_CHANNEL=$(
-  RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 python "${RAFT_COMMIT:0:7}"
+  RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 python "${RAFT_COMMIT:0:7}"
 )
 RAFT_DASK_CHANNEL=$(
   RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 python "${RAFT_COMMIT:0:7}"

From 9773c81683a5a8b5e5ba612a72929cce613e01fd Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 6 Jan 2025 18:35:33 -0800
Subject: [PATCH 15/48] clone cumlprims_mg in CI

---
 .github/workflows/pr.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 8e742c2076..1526081d67 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -177,6 +177,9 @@ jobs:
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
       script: ci/build_wheel_libcuml.sh
+      extra-repo: rapidsai/cumlprims_mg
+      extra-repo-sha: branch-25.02
+      extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
       # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
   wheel-build-cuml:

From a56e3d4f0567bee7a7ec448ced920c35a4998ceb Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 7 Jan 2025 14:13:26 -0800
Subject: [PATCH 16/48] fix pins, fix script

---
 ci/build_wheel_cuml.sh                           | 2 +-
 ci/build_wheel_libcuml.sh                        | 2 +-
 ci/test_wheel.sh                                 | 1 +
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +-
 dependencies.yaml                                | 4 +++-
 python/cuml/pyproject.toml                       | 2 +-
 python/libcuml/pyproject.toml                    | 4 +++-
 8 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 8b3676cc9d..518b97ce9a 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -44,6 +44,6 @@ python -m auditwheel repair \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
-./ci/validate_wheel.sh ${package_dir} final_dist
+./ci/validate_wheel.sh ${package_dir}/final_dist
 
 RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python "${package_dir}/final_dist"
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index 72c2b46072..0abdfcfeae 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -61,6 +61,6 @@ python -m auditwheel repair \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
-./ci/validate_wheel.sh ${package_dir} final_dist
+./ci/validate_wheel.sh ${package_dir}/final_dist
 
 RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index 4924a17982..8027876005 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -20,6 +20,7 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
+
 rapids-logger "pytest cuml single GPU"
 ./ci/run_cuml_singlegpu_pytests.sh \
   --numprocesses=8 \
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 6eb49b7dfd..05cfd3d7ae 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -43,7 +43,7 @@ dependencies:
 - libcusparse=11.7.5.86
 - libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
-- libraft=25.2.*,>=0.0.0a0
+- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
 - ninja
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index ae531dd438..a2daa61350 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -40,7 +40,7 @@ dependencies:
 - libcusparse-dev
 - libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
-- libraft=25.2.*,>=0.0.0a0
+- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
 - ninja
diff --git a/dependencies.yaml b/dependencies.yaml
index 391b99dc08..c4c10d48bd 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -170,6 +170,8 @@ files:
       table: project
     includes:
       - cuda_wheels
+      - depends_on_cuvs
+      - depends_on_treelite
 channels:
   - rapidsai
   - rapidsai-nightly
@@ -636,7 +638,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &libraft_unsuffixed libraft=25.2.*,>=0.0.0a0
+          - &libraft_unsuffixed libraft==25.2.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index b7a6e2c424..4b2c85c724 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -184,7 +184,7 @@ requires = [
     "cuvs==25.2.*,>=0.0.0a0",
     "cython>=3.0.0",
     "libcuml==25.2.*,>=0.0.0a0",
-    "libraft=25.2.*,>=0.0.0a0",
+    "libraft==25.2.*,>=0.0.0a0",
     "librmm==25.2.*,>=0.0.0a0",
     "ninja",
     "pylibraft==25.2.*,>=0.0.0a0",
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
index c2b7549c49..6085278957 100644
--- a/python/libcuml/pyproject.toml
+++ b/python/libcuml/pyproject.toml
@@ -37,11 +37,13 @@ classifiers = [
     "Environment :: GPU :: NVIDIA CUDA",
 ]
 dependencies = [
+    "cuvs==25.2.*,>=0.0.0a0",
     "nvidia-cublas",
     "nvidia-cufft",
     "nvidia-curand",
     "nvidia-cusolver",
     "nvidia-cusparse",
+    "treelite==4.3.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
@@ -80,7 +82,7 @@ requires = [
     "cmake>=3.26.4,!=3.30.0",
     "cuda-python",
     "cython>=3.0.0",
-    "libraft=25.2.*,>=0.0.0a0",
+    "libraft==25.2.*,>=0.0.0a0",
     "librmm==25.2.*,>=0.0.0a0",
     "ninja",
     "treelite==4.3.0",

From dba121f63cd07a3da42d5f133ae4e5e387117047 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 8 Jan 2025 08:55:23 -0800
Subject: [PATCH 17/48] pull in changes from logger targets

---
 cpp/cmake/thirdparty/get_raft.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index fa569927cd..b9468ab834 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -44,7 +44,7 @@ function(find_and_configure_raft)
 
     # TODO(jameslamb): justify not exporting raft?
     rapids_cpm_find(raft ${PKG_VERSION}
-      GLOBAL_TARGETS      raft::raft
+      GLOBAL_TARGETS      raft::raft raft::raft_logger raft::raft_logger_impl
       COMPONENTS          ${RAFT_COMPONENTS}
       CPM_ARGS
         GIT_REPOSITORY         https://github.com/${PKG_FORK}/raft.git

From 2290fb76324d340eca5159302f79b86628ccc55b Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 8 Jan 2025 20:44:08 -0800
Subject: [PATCH 18/48] more changes

---
 ci/build_wheel_cuml.sh         | 12 ++++++------
 ci/build_wheel_libcuml.sh      | 11 ++++++-----
 cpp/CMakeLists.txt             |  4 ++--
 dependencies.yaml              |  1 +
 python/libcuml/libcuml/load.py | 16 ++++++++++++++++
 python/libcuml/pyproject.toml  |  1 +
 6 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 518b97ce9a..1ae21901ef 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -11,11 +11,14 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 # TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
 source ./ci/use_wheels_from_prs.sh
 
-# TODO(jameslamb): split this out into build_wheel_{cuml,libcuml}.sh
-# TODO(jameslamb): add libcuml++.so to cuml exclusions
+EXCLUDE_ARGS=(
+  --exclude "libcuml++.so"
+  --exclude "libcuvs.so"
+)
+
 case "${RAPIDS_CUDA_VERSION}" in
   12.*)
-    EXCLUDE_ARGS=(
+    EXCLUDE_ARGS+=(
       --exclude "libcuvs.so"
       --exclude "libcublas.so.12"
       --exclude "libcublasLt.so.12"
@@ -28,9 +31,6 @@ case "${RAPIDS_CUDA_VERSION}" in
     EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
     ;;
   11.*)
-    EXCLUDE_ARGS=(
-      --exclude "libcuvs.so"
-    )
     EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
     ;;
 esac
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index 0abdfcfeae..6fa5a77f60 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -28,10 +28,14 @@ python -m pip install \
 # 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
 export PIP_NO_BUILD_ISOLATION=0
 
+EXCLUDE_ARGS=(
+  --exclude "libcuvs.so"
+  --exclude "libraft.so"
+)
+
 case "${RAPIDS_CUDA_VERSION}" in
   12.*)
-    EXCLUDE_ARGS=(
-      --exclude "libcuvs.so"
+    EXCLUDE_ARGS+=(
       --exclude "libcublas.so.12"
       --exclude "libcublasLt.so.12"
       --exclude "libcufft.so.11"
@@ -43,9 +47,6 @@ case "${RAPIDS_CUDA_VERSION}" in
     EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
     ;;
   11.*)
-    EXCLUDE_ARGS=(
-      --exclude "libcuvs.so"
-    )
     EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
     ;;
 esac
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b082034238..13adb87385 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -65,6 +65,7 @@ option(USE_CCACHE "Cache build artifacts with ccache" OFF)
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
 option(CUDA_STATIC_MATH_LIBRARIES "Statically link the CUDA math libraries" OFF)
 option(CUML_USE_CUVS_STATIC "Build and statically link the CUVS library" OFF)
+option(CUML_USE_RAFT_STATIC "Build and statically link the RAFT library" OFF)
 option(CUML_USE_TREELITE_STATIC "Build and statically link the treelite library" OFF)
 option(CUML_EXPORT_TREELITE_LINKAGE "Whether to publicly or privately link treelite to libcuml++" OFF)
 option(CUML_USE_CUMLPRIMS_MG_STATIC "Build and statically link the cumlprims_mg library" OFF)
@@ -619,7 +620,6 @@ if(BUILD_CUML_CPP_LIBRARY)
   # These are always private:
   list(APPEND _cuml_cpp_private_libs
     raft::raft
-    rmm::rmm_logger
     rmm::rmm_logger_impl
     raft::raft_logger_impl
     cuml_logger_impl
@@ -647,7 +647,7 @@ if(BUILD_CUML_CPP_LIBRARY)
   )
 
   target_link_libraries(${CUML_CPP_TARGET}
-    PUBLIC  rmm::rmm ${CUVS_LIB}
+    PUBLIC  rmm::rmm rmm::rmm_logger ${CUVS_LIB}
             ${_cuml_cpp_public_libs}
             cuml_logger
     PRIVATE ${_cuml_cpp_private_libs}
diff --git a/dependencies.yaml b/dependencies.yaml
index c4c10d48bd..65879e0b34 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -171,6 +171,7 @@ files:
     includes:
       - cuda_wheels
       - depends_on_cuvs
+      - depends_on_libraft
       - depends_on_treelite
 channels:
   - rapidsai
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
index e01e91c87a..3dbee92bfb 100644
--- a/python/libcuml/libcuml/load.py
+++ b/python/libcuml/libcuml/load.py
@@ -46,6 +46,22 @@ def _load_wheel_installation(soname: str):
 
 def load_library():
     """Dynamically load libcuml++.so and its dependencies"""
+    try:
+        # libraft must be loaded before libcuml++ because libcuml++
+        # references its symbols
+        import libraft
+
+        libraft.load_library()
+    except ModuleNotFoundError:
+        # 'libcuml++' has a runtime dependency on 'libraft'. However,
+        # that dependency might be satisfied by the 'libraft' conda package
+        # (which does not have any Python modules), instead of the
+        # 'libraft' wheel.
+        #
+        # In that situation, assume that 'libraft.so' is in a place where
+        # the loader can find it.
+        pass
+
     # treelite must be loaded before libcuml++ because libcuml++
     # references its symbols
     import treelite
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
index 6085278957..2619fa46a4 100644
--- a/python/libcuml/pyproject.toml
+++ b/python/libcuml/pyproject.toml
@@ -38,6 +38,7 @@ classifiers = [
 ]
 dependencies = [
     "cuvs==25.2.*,>=0.0.0a0",
+    "libraft==25.2.*,>=0.0.0a0",
     "nvidia-cublas",
     "nvidia-cufft",
     "nvidia-curand",

From e6f4d73ea0eec8a73c28bd8c7e6d26500757a018 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 8 Jan 2025 22:49:21 -0800
Subject: [PATCH 19/48] update handling of cumlprims_mg

---
 ci/build_wheel_cuml.sh         |  1 +
 ci/build_wheel_libcuml.sh      |  2 ++
 python/cuml/CMakeLists.txt     | 17 ++---------------
 python/libcuml/CMakeLists.txt  |  5 +++--
 python/libcuml/libcuml/load.py |  9 +++++++--
 5 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 1ae21901ef..5a54c02f14 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -13,6 +13,7 @@ source ./ci/use_wheels_from_prs.sh
 
 EXCLUDE_ARGS=(
   --exclude "libcuml++.so"
+  --exclude "libcumlprims_mg.so"
   --exclude "libcuvs.so"
 )
 
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index 6fa5a77f60..0099672f85 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -28,7 +28,9 @@ python -m pip install \
 # 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
 export PIP_NO_BUILD_ISOLATION=0
 
+# TODO(jameslamb): it's weird to "exclude" libcumlprims_mg just as a way to stop auditwheel from complaining... when it really is in the wheel
 EXCLUDE_ARGS=(
+  --exclude "libcumlprims_mg.so"
   --exclude "libcuvs.so"
   --exclude "libraft.so"
 )
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 4f1a14a612..12f013dfd8 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -85,19 +85,6 @@ set(CUML_USE_TREELITE_STATIC ON)
 set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
 include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
 
-# get nlohmann_json and rapidsjson
-# TODO(jameslamb): maybe this isn't necessary?
-# function(copy_interface_excludes)
-#   set(_options "")
-#   set(_one_value TARGET INCLUDED_TARGET)
-#   set(_multi_value "")
-#   cmake_parse_arguments(_CUML_INCLUDES "${_options}" "${_one_value}"
-#                         "${_multi_value}" ${ARGN})
-#   get_target_property(_includes ${_CUML_INCLUDES_INCLUDED_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
-#   target_include_directories(${_CUML_INCLUDES_TARGET} PUBLIC ${_includes})
-# endfunction()
-# copy_interface_excludes(INCLUDED_TARGET treelite::treelite_static TARGET ${CUML_CPP_TARGET})
-
 # --- cuvs --- #
 if(USE_CUVS_WHEEL)
   set(CUML_USE_CUVS_STATIC OFF)
@@ -113,10 +100,10 @@ set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
 include(${CUML_CPP_SRC}/cmake/thirdparty/get_cuvs.cmake)
 
 # --- cumlprims_mg --- #
+# TODO(jameslamb): confirm that cumlprims_mg doesn't start getting installed in conda packages
+# dynamic cumlprims_mg should be found, not installed
 set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
-# TODO(jameslamb): figure out how cumlprims_mg should work (re-download in cuml? only install/export the headers in libcuml?)
 set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
-include(${CUML_CPP_SRC}/cmake/thirdparty/get_cumlprims_mg.cmake)
 
 find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
 
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index d2b0ae12bc..4700dc2cff 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -71,9 +71,10 @@ set(CUML_RAFT_CLONE_ON_PIN OFF)
 set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
 
 # --- cumlprims_mg --- #
-set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
+# ship cumlprims_mg in the 'libcuml' wheel (for re-use by 'cuml' wheels)
+set(CUML_USE_CUMLPRIMS_MG_STATIC OFF)
 # TODO(jameslamb): figure out how cumlprims_mg should work (re-download in cuml? only install/export the headers in libcuml?)
-set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
+set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL OFF)
 
 # --- cuvs --- #
  
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
index 3dbee92bfb..4226c48277 100644
--- a/python/libcuml/libcuml/load.py
+++ b/python/libcuml/libcuml/load.py
@@ -37,8 +37,13 @@ def _load_wheel_installation(soname: str):
 
     Returns ``None`` if the library cannot be loaded.
     """
+    # cumlprims_mg installs to lib/
+    if soname.startswith("libcumlprims_mg"):
+        relative_libdir = "lib"
+    else:
+        relative_libdir = "lib64"
     if os.path.isfile(
-        lib := os.path.join(os.path.dirname(__file__), "lib64", soname)
+        lib := os.path.join(os.path.dirname(__file__), relative_libdir, soname)
     ):
         return ctypes.CDLL(lib, PREFERRED_LOAD_FLAG)
     return None
@@ -72,7 +77,7 @@ def load_library():
     )
 
     # TODO(jameslamb): remove for loop?
-    for soname in ["libcuml++.so"]:
+    for soname in ["libcumlprims_mg.so", "libcuml++.so"]:
         libcuml_lib = None
         if prefer_system_installation:
             # Prefer a system library if one is present to

From a224fc1b9cf40a210985d9df22737bbf5d499ab5 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 9 Jan 2025 07:34:42 -0800
Subject: [PATCH 20/48] more todo

---
 python/cuml/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 12f013dfd8..81d0953df8 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -145,6 +145,7 @@ else()
   "--compile-time-env GPUBUILD=1")
 endif()
 
+# TODO(jameslamb): should SINGLEGPU move into libcuml?
 if(NOT SINGLEGPU)
   include("${CUML_CPP_SRC}/cmake/thirdparty/get_cumlprims_mg.cmake")
   set(cuml_mg_libraries

From 05f8fbf4e4d3ed024fb1fd8bfae8aaa591924cd9 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 10 Jan 2025 11:56:54 -0800
Subject: [PATCH 21/48] start unwinding CPM debugging changes

---
 .github/workflows/build.yaml        |  2 ++
 ci/build_wheel_libcuml.sh           |  2 +-
 cpp/cmake/thirdparty/get_cccl.cmake |  4 ++--
 cpp/cmake/thirdparty/get_rmm.cmake  |  4 ++--
 dependencies.yaml                   |  1 +
 python/cuml/CMakeLists.txt          | 26 +++++++++++++++-----------
 python/libcuml/CMakeLists.txt       |  9 ++++-----
 python/libcuml/libcuml/__init__.py  |  2 +-
 python/libcuml/libcuml/_version.py  |  2 +-
 python/libcuml/libcuml/load.py      | 15 ++++++++-------
 python/libcuml/pyproject.toml       |  2 +-
 11 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 2342af477b..1d2a285a68 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -87,6 +87,7 @@ jobs:
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
       package-name: libcuml
+      package-type: cpp
   wheel-build-cuml:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
@@ -112,3 +113,4 @@ jobs:
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
       package-name: cuml
+      package-type: python
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index 0099672f85..4ce0bd0dd0 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/cpp/cmake/thirdparty/get_cccl.cmake b/cpp/cmake/thirdparty/get_cccl.cmake
index 991b4e4edd..3245929460 100644
--- a/cpp/cmake/thirdparty/get_cccl.cmake
+++ b/cpp/cmake/thirdparty/get_cccl.cmake
@@ -15,8 +15,8 @@
 # Use CPM to find or clone CCCL
 function(find_and_configure_cccl)
         include(${rapids-cmake-dir}/cpm/cccl.cmake)
-        # TODO(jameslamb): justify not exporting cccl?
-        rapids_cpm_cccl()
+        rapids_cpm_cccl(BUILD_EXPORT_SET cuml-exports
+                        INSTALL_EXPORT_SET cuml-exports)
 endfunction()
 
 find_and_configure_cccl()
diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake
index f30204beaa..d69da49762 100644
--- a/cpp/cmake/thirdparty/get_rmm.cmake
+++ b/cpp/cmake/thirdparty/get_rmm.cmake
@@ -16,8 +16,8 @@
 
 function(find_and_configure_rmm)
     include(${rapids-cmake-dir}/cpm/rmm.cmake)
-    # TODO(jameslamb): justify not exporting rmm?
-    rapids_cpm_rmm()
+    rapids_cpm_rmm(BUILD_EXPORT_SET cuml-exports
+                   INSTALL_EXPORT_SET cuml-exports)
 endfunction()
 
 find_and_configure_rmm()
diff --git a/dependencies.yaml b/dependencies.yaml
index 65879e0b34..44c62e17d3 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -158,6 +158,7 @@ files:
       table: tool.rapids-build-backend
       key: requires
     includes:
+      # TODO(jameslamb): split out cuda-python and cython
       - common_build
       - depends_on_libraft
       - depends_on_librmm
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 81d0953df8..9172a1e1a3 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -19,7 +19,9 @@ include(../../rapids_config.cmake)
 option(CUML_CPU "Build only cuML CPU Python components." OFF)
 set(language_list "CXX")
 
+# TODO(jameslamb): can this be removed?
 if(NOT CUML_CPU)
+  # TODO(jameslamb): where does rapids_cuda_init_architectures belong?
   # We always need CUDA for cuML GPU because the raft dependency brings in a
   # header-only cuco dependency that enables CUDA unconditionally.
   include(rapids-cuda)
@@ -38,7 +40,6 @@ project(
 option(CUML_UNIVERSAL "Build all cuML Python components." ON)
 option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
 option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
-set(CUML_RAFT_CLONE_ON_PIN OFF)
 
 # todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
 # https://github.com/rapidsai/cuml/issues/4843
@@ -69,15 +70,20 @@ rapids_cpm_init()
 #
 # include(cmake/thirdparty/get_cccl.cmake)
 include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
+
+# --- raft --- #
+set(CUML_RAFT_CLONE_ON_PIN OFF)
+set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
 include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
 
 # --- treelite --- #
 # We need to call get_treelite explicitly because we need the correct
-# ${TREELITE_LIBS} definition for RF
+# ${TREELITE_LIBS} definition for RF.
+#
+# And because cuml Cython code needs to headers to satisfy calls like 'cdef extern from "treelite/c_api.h"'
 #
 # and it needs to come before find_package(cuml), because it's a PUBLIC
 # dependency of cuml::cuml
-set(CUML_EXPORT_TREELITE_LINKAGE ON)
 set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
 # TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
 set(CUML_USE_TREELITE_STATIC ON)
@@ -97,18 +103,19 @@ endif()
 set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
 
 # download this again just to get the headers
+# TODO(jameslamb): prevent re-compilation of cuvs here
+# set(BUILD_SHARED_LIBS OFF)
+# set(BUILD_C_LIBRARY OFF)
 include(${CUML_CPP_SRC}/cmake/thirdparty/get_cuvs.cmake)
 
 # --- cumlprims_mg --- #
 # TODO(jameslamb): confirm that cumlprims_mg doesn't start getting installed in conda packages
 # dynamic cumlprims_mg should be found, not installed
-set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
+set(CUML_USE_CUMLPRIMS_MG_STATIC OFF)
 set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
 
 find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
 
-include(rapids-cython-core)
-
 # cuml-cpu does not need libcuml++.so
 # TODO(jameslamb): maybe all the stuff above is unnecessary in the CPU-only case?
 if(NOT CUML_CPU)
@@ -160,7 +167,7 @@ endif()
 include("${CUML_CPP_SRC}/cmake/modules/ConfigureAlgorithms.cmake")
 include(cmake/ConfigureCythonAlgorithms.cmake)
 
-if(${CUML_CPU})
+if(CUML_CPU)
   # libcuml requires metrics built if HDSCAN is built, which is not the case
   # for cuml-cpu
   unset(metrics_algo)
@@ -168,6 +175,7 @@ endif()
 
 message(VERBOSE "CUML_PY: Building cuML with algorithms: '${CUML_ALGORITHMS}'.")
 
+include(rapids-cython-core)
 rapids_cython_init()
 
 add_subdirectory(cuml/common)
@@ -192,7 +200,3 @@ add_subdirectory(cuml/svm)
 add_subdirectory(cuml/tsa)
 
 add_subdirectory(cuml/experimental/linear_model)
-
-if(USE_CUVS_WHEEL)
-  rapids_cython_add_rpath_entries(TARGET cuml PATHS cuvs)
-endif()
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index 4700dc2cff..de8605e012 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -24,7 +24,6 @@ project(
 
 ################################################################################
 # - User Options  --------------------------------------------------------------
-option(CUML_UNIVERSAL "Build all cuML Python components." ON)
 option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
 option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF)
 option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
@@ -33,6 +32,8 @@ option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
 # https://github.com/rapidsai/cuml/issues/4843
 message(VERBOSE "CUML_PY: Disabling all mnmg components and comms libraries: ${SINGLEGPU}")
 
+# In libcuml wheels, we always want to build in all cuML algorithms.
+# This is the default in cpp/CMakeLists.txt, but just making that choice for wheels explicit here.
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")
 
 set(CUML_CPP_TARGET "cuml++")
@@ -77,7 +78,6 @@ set(CUML_USE_CUMLPRIMS_MG_STATIC OFF)
 set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL OFF)
 
 # --- cuvs --- #
- 
 if(USE_CUVS_WHEEL)
   set(CUML_USE_CUVS_STATIC OFF)
 else()
@@ -102,12 +102,11 @@ else()
   set(CUDA_STATIC_MATH_LIBRARIES ON)
 endif()
 
-# TODO(jameslamb): keep cuvs files (like lib64/libcuvs.{a,so}) out of the wheel
-# TODO(jameslamb): audit all file contents
 add_subdirectory(../../cpp cuml-cpp)
 
 if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
   # assumes libcuml++ is installed 2 levels deep, e.g. site-packages/cuml/lib64/libcuml++.so
+  # TODO(jameslamb): is nvjitlink even necessary?
   set(rpaths
     "$ORIGIN/../../nvidia/cublas/lib"
     "$ORIGIN/../../nvidia/cufft/lib"
diff --git a/python/libcuml/libcuml/__init__.py b/python/libcuml/libcuml/__init__.py
index 73d050e883..69d95c8423 100644
--- a/python/libcuml/libcuml/__init__.py
+++ b/python/libcuml/libcuml/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/libcuml/libcuml/_version.py b/python/libcuml/libcuml/_version.py
index 09fcb13f3d..da66c0d576 100644
--- a/python/libcuml/libcuml/_version.py
+++ b/python/libcuml/libcuml/_version.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
index 4226c48277..0b51ec00f7 100644
--- a/python/libcuml/libcuml/load.py
+++ b/python/libcuml/libcuml/load.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -76,7 +76,7 @@ def load_library():
         != "false"
     )
 
-    # TODO(jameslamb): remove for loop?
+    libs_to_return = []
     for soname in ["libcumlprims_mg.so", "libcuml++.so"]:
         libcuml_lib = None
         if prefer_system_installation:
@@ -100,10 +100,11 @@ def load_library():
                 # and rely on other mechanisms (like RPATHs on other DSOs) to
                 # help the loader find the library.
                 pass
+        if libcuml_lib:
+            libs_to_return.append(libcuml_lib)
 
-    # The caller almost never needs to do anything with this library, but no
-    # harm in offering the option since this object at least provides a handle
-    # to inspect where libcuml was loaded from.
+    # The caller almost never needs to do anything with these libraries, but no
+    # harm in offering the option since these objects at least provide handles
+    # to inspect where libcugraph was loaded from.
 
-    # TODO(jameslamb): return something here?
-    return libcuml_lib
+    return libs_to_return
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
index 2619fa46a4..e0df96779b 100644
--- a/python/libcuml/pyproject.toml
+++ b/python/libcuml/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 8f505e38d688ac3f2f8b61eed4b08564f5a1937d Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 10 Jan 2025 12:42:39 -0800
Subject: [PATCH 22/48] undo exporting

---
 cpp/cmake/thirdparty/get_cccl.cmake         | 2 +-
 cpp/cmake/thirdparty/get_cumlprims_mg.cmake | 3 ++-
 cpp/cmake/thirdparty/get_cuvs.cmake         | 4 +++-
 cpp/cmake/thirdparty/get_raft.cmake         | 3 ++-
 cpp/cmake/thirdparty/get_rmm.cmake          | 2 +-
 5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/cpp/cmake/thirdparty/get_cccl.cmake b/cpp/cmake/thirdparty/get_cccl.cmake
index 3245929460..0c126e320e 100644
--- a/cpp/cmake/thirdparty/get_cccl.cmake
+++ b/cpp/cmake/thirdparty/get_cccl.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# Copyright (c) 2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/cpp/cmake/thirdparty/get_cumlprims_mg.cmake b/cpp/cmake/thirdparty/get_cumlprims_mg.cmake
index 0799f6fe5f..543e25101e 100644
--- a/cpp/cmake/thirdparty/get_cumlprims_mg.cmake
+++ b/cpp/cmake/thirdparty/get_cumlprims_mg.cmake
@@ -35,9 +35,10 @@ function(find_and_configure_cumlprims_mg)
       set(CUMLPRIMS_MG_BUILD_SHARED_LIBS OFF)
     endif()
 
-    # TODO(jameslamb): justify not exporting cumlprims?
     rapids_cpm_find(cumlprims_mg ${PKG_VERSION}
       GLOBAL_TARGETS      cumlprims_mg::cumlprims_mg
+      BUILD_EXPORT_SET    cuml-exports
+      INSTALL_EXPORT_SET  cuml-exports
         CPM_ARGS
           GIT_REPOSITORY git@github.com:${PKG_FORK}/cumlprims_mg.git
           GIT_TAG        ${PKG_PINNED_TAG}
diff --git a/cpp/cmake/thirdparty/get_cuvs.cmake b/cpp/cmake/thirdparty/get_cuvs.cmake
index 03673ce6c8..6c3f7895af 100644
--- a/cpp/cmake/thirdparty/get_cuvs.cmake
+++ b/cpp/cmake/thirdparty/get_cuvs.cmake
@@ -43,9 +43,10 @@ function(find_and_configure_cuvs)
       set(CUVS_BUILD_MG_ALGOS OFF)
     endif()
 
-    # TODO(jameslamb): is not exporting cuvs ok here?
     rapids_cpm_find(cuvs ${PKG_VERSION}
       GLOBAL_TARGETS      cuvs::cuvs
+      BUILD_EXPORT_SET    cuml-exports
+      INSTALL_EXPORT_SET  cuml-exports
       CPM_ARGS
         GIT_REPOSITORY         https://github.com/${PKG_FORK}/cuvs.git
         GIT_TAG                ${PKG_PINNED_TAG}
@@ -56,6 +57,7 @@ function(find_and_configure_cuvs)
           "BUILD_CAGRA_HNSWLIB OFF"
           "BUILD_CUVS_BENCH OFF"
           "BUILD_MG_ALGOS ${CUVS_BUILD_MG_ALGOS}"
+
     )
 
     if(cuvs_ADDED)
diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index b9468ab834..f6e87f8ba3 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -42,9 +42,10 @@ function(find_and_configure_raft)
 
     message(VERBOSE "CUML: raft FIND_PACKAGE_ARGUMENTS COMPONENTS ${RAFT_COMPONENTS}")
 
-    # TODO(jameslamb): justify not exporting raft?
     rapids_cpm_find(raft ${PKG_VERSION}
       GLOBAL_TARGETS      raft::raft raft::raft_logger raft::raft_logger_impl
+      BUILD_EXPORT_SET    cuml-exports
+      INSTALL_EXPORT_SET  cuml-exports
       COMPONENTS          ${RAFT_COMPONENTS}
       CPM_ARGS
         GIT_REPOSITORY         https://github.com/${PKG_FORK}/raft.git
diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake
index d69da49762..35968f7245 100644
--- a/cpp/cmake/thirdparty/get_rmm.cmake
+++ b/cpp/cmake/thirdparty/get_rmm.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# Copyright (c) 2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 829d71463cbb46a4d5b7a7ad5a55850a50844858 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 08:05:49 -0800
Subject: [PATCH 23/48] add debugging prints, update RAFT commit

---
 ci/use_conda_packages_from_prs.sh |   2 +-
 ci/use_wheels_from_prs.sh         |   2 +-
 python/cuml/CMakeLists.txt        | 131 +++++++++++++++---------------
 3 files changed, 68 insertions(+), 67 deletions(-)

diff --git a/ci/use_conda_packages_from_prs.sh b/ci/use_conda_packages_from_prs.sh
index 90293b71fa..ac17020687 100644
--- a/ci/use_conda_packages_from_prs.sh
+++ b/ci/use_conda_packages_from_prs.sh
@@ -1,7 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 # TODO(jameslamb): remove this file when https://github.com/rapidsai/raft/pull/2531 is merged
 
-RAFT_COMMIT="345f0e556b602ec65b5eebe825ffd000d61706fe"
+RAFT_COMMIT="d275c995fb51310d1340fe2fd6d63d0bfd43cafa"
 
 RAFT_CPP_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}")
 RAFT_PYTHON_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 python "${RAFT_COMMIT:0:7}")
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
index 66b002dd4f..ce5b5eb8a2 100644
--- a/ci/use_wheels_from_prs.sh
+++ b/ci/use_wheels_from_prs.sh
@@ -3,7 +3,7 @@
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-RAFT_COMMIT="345f0e556b602ec65b5eebe825ffd000d61706fe"
+RAFT_COMMIT="d275c995fb51310d1340fe2fd6d63d0bfd43cafa"
 LIBRAFT_CHANNEL=$(
   RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}"
 )
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 9172a1e1a3..e06e183e30 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -19,7 +19,7 @@ include(../../rapids_config.cmake)
 option(CUML_CPU "Build only cuML CPU Python components." OFF)
 set(language_list "CXX")
 
-# TODO(jameslamb): can this be removed?
+# TODO(jameslamb): can this be removed? (I think so?)
 if(NOT CUML_CPU)
   # TODO(jameslamb): where does rapids_cuda_init_architectures belong?
   # We always need CUDA for cuML GPU because the raft dependency brings in a
@@ -54,72 +54,73 @@ set(CUML_CPP_SRC "../../cpp")
 ################################################################################
 # - Process User Options  ------------------------------------------------------
 
-include(rapids-cpm)
-include(rapids-export)
-rapids_cpm_init()
-
-# --- CCCL, RAFT, and RMM --- #
-# CCCL before RMM, and RMM before RAFT
-# TODO(jameslamb): skipping CCCL because we're already getting it from libraft... is that ok?
-#   -- Found CCCL: /pyenv/versions/3.12.8/lib/python3.12/site-packages/libraft/lib64/rapids/cmake/cccl/cccl-config.cmake (found version "2.7.0.0")
-#     CMake Error at /pyenv/versions/3.12.8/lib/python3.12/site-packages/libraft/lib64/cmake/NvidiaCutlass/NvidiaCutlassTargets.cmake:42 (message):
-#     Some (but not all) targets in this export set were already defined.
-#
-#     Targets Defined: nvidia::cutlass::cutlass
-#     Targets not yet defined: nvidia::cutlass::tools::util
-#
-# include(cmake/thirdparty/get_cccl.cmake)
-include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
-
-# --- raft --- #
-set(CUML_RAFT_CLONE_ON_PIN OFF)
-set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
-include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
-
-# --- treelite --- #
-# We need to call get_treelite explicitly because we need the correct
-# ${TREELITE_LIBS} definition for RF.
-#
-# And because cuml Cython code needs to headers to satisfy calls like 'cdef extern from "treelite/c_api.h"'
-#
-# and it needs to come before find_package(cuml), because it's a PUBLIC
-# dependency of cuml::cuml
-set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
-# TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
-set(CUML_USE_TREELITE_STATIC ON)
-# TODO(jameslamb): should treelite just be included here?
-set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
-include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
-
-# --- cuvs --- #
-if(USE_CUVS_WHEEL)
-  set(CUML_USE_CUVS_STATIC OFF)
-else()
-  set(CUML_USE_CUVS_STATIC ON)
-endif()
-
-# either way, don't unclude any cuvs stuff in wheel
-# (expect anything building against libcuml to provide cuvs headers externally)
-set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
-
-# download this again just to get the headers
-# TODO(jameslamb): prevent re-compilation of cuvs here
-# set(BUILD_SHARED_LIBS OFF)
-# set(BUILD_C_LIBRARY OFF)
-include(${CUML_CPP_SRC}/cmake/thirdparty/get_cuvs.cmake)
-
-# --- cumlprims_mg --- #
-# TODO(jameslamb): confirm that cumlprims_mg doesn't start getting installed in conda packages
-# dynamic cumlprims_mg should be found, not installed
-set(CUML_USE_CUMLPRIMS_MG_STATIC OFF)
-set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
-
-find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
-
 # cuml-cpu does not need libcuml++.so
-# TODO(jameslamb): maybe all the stuff above is unnecessary in the CPU-only case?
 if(NOT CUML_CPU)
-  # (moved to libcuml/CMakeLists.txt)
+
+  include(rapids-cpm)
+  include(rapids-export)
+  rapids_cpm_init()
+
+  # re-find CCCL, RMM, and RAFT before cuVS
+  include(cmake/thirdparty/get_cccl.cmake)
+  include(cmake/thirdparty/get_rmm.cmake)
+  include(cmake/thirdparty/get_raft.cmake)
+
+  # --- cuvs --- #
+  # Once there are 'libcuvs' wheels, it should be possible to remove this CPM build of cuvs.
+  #
+  #   * conda builds will find 'libcuvs' in the build environment
+  #   * wheel builds will find 'libcuvs' wherever that wheel is installed
+  #
+  # Until then, this is necessary because cuVS is included in the public headers of both
+  # libcuml (C++) and cuml (Cython).
+  message(STATUS "--- [debug] finding cuVS for cuML")
+  set(CUML_USE_CUVS_STATIC OFF)
+  set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
+  include(${CUML_CPP_SRC}/cmake/thirdparty/get_cuvs.cmake)
+  message(STATUS "--- [debug] done finding cuVS for cuML")
+
+  find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
+  
+  # --- CCCL, RAFT, and RMM --- #
+  # CCCL before RMM, and RMM before RAFT
+  # TODO(jameslamb): skipping CCCL because we're already getting it from libraft... is that ok?
+  #   -- Found CCCL: /pyenv/versions/3.12.8/lib/python3.12/site-packages/libraft/lib64/rapids/cmake/cccl/cccl-config.cmake (found version "2.7.0.0")
+  #     CMake Error at /pyenv/versions/3.12.8/lib/python3.12/site-packages/libraft/lib64/cmake/NvidiaCutlass/NvidiaCutlassTargets.cmake:42 (message):
+  #     Some (but not all) targets in this export set were already defined.
+  #
+  #     Targets Defined: nvidia::cutlass::cutlass
+  #     Targets not yet defined: nvidia::cutlass::tools::util
+  #
+  # include(cmake/thirdparty/get_cccl.cmake)
+  include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
+  
+  # --- raft --- #
+  # TODO(jameslamb): the 'libraft' wheel does 
+  set(CUML_RAFT_CLONE_ON_PIN OFF)
+  set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
+  include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
+  
+  # --- treelite --- #
+  # We need to call get_treelite explicitly because we need the correct
+  # ${TREELITE_LIBS} definition for RF.
+  #
+  # And because cuml Cython code needs to headers to satisfy calls like 'cdef extern from "treelite/c_api.h"'
+  #
+  # and it needs to come before find_package(cuml), because it's a PUBLIC
+  # dependency of cuml::cuml
+  set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
+  # TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
+  set(CUML_USE_TREELITE_STATIC ON)
+  # TODO(jameslamb): should treelite just be included here?
+  set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
+  include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
+
+  # --- cumlprims_mg --- #
+  # TODO(jameslamb): confirm that cumlprims_mg doesn't start getting installed in conda packages
+  # dynamic cumlprims_mg should be found, not installed
+  set(CUML_USE_CUMLPRIMS_MG_STATIC OFF)
+  set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
 endif()
 
 if(CUML_CPU)

From 8151e6237031af95a530b04c78271b61aaf6dcb4 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 09:00:47 -0800
Subject: [PATCH 24/48] this is working

---
 python/cuml/CMakeLists.txt | 43 ++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index e06e183e30..f27fd2f0a7 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -62,9 +62,12 @@ if(NOT CUML_CPU)
   rapids_cpm_init()
 
   # re-find CCCL, RMM, and RAFT before cuVS
-  include(cmake/thirdparty/get_cccl.cmake)
-  include(cmake/thirdparty/get_rmm.cmake)
-  include(cmake/thirdparty/get_raft.cmake)
+  message(STATUS "--- [debug] finding CCCL for cuML")
+  include(${CUML_CPP_SRC}/cmake/thirdparty/get_cccl.cmake)
+  message(STATUS "--- [debug] finding RMM for cuML")
+  include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
+  message(STATUS "--- [debug] finding RAFT for cuML")
+  include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
 
   # --- cuvs --- #
   # Once there are 'libcuvs' wheels, it should be possible to remove this CPM build of cuvs.
@@ -80,7 +83,26 @@ if(NOT CUML_CPU)
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_cuvs.cmake)
   message(STATUS "--- [debug] done finding cuVS for cuML")
 
+  # --- treelite --- #
+  # We need to call get_treelite explicitly because we need the correct
+  # ${TREELITE_LIBS} definition for RF.
+  #
+  # And because cuml Cython code needs to headers to satisfy calls like 'cdef extern from "treelite/c_api.h"'
+  #
+  # and it needs to come before find_package(cuml), because it's a PUBLIC
+  # dependency of cuml::cuml
+  set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
+  # TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
+  set(CUML_USE_TREELITE_STATIC ON)
+  # TODO(jameslamb): should treelite just be included here?
+  set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
+  message(STATUS "--- [debug] finding treelite for cuML")
+  include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
+  message(STATUS "--- [debug] done finding treelite for cuML")
+
+  message(STATUS "--- [debug] finding cuML")
   find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
+  message(STATUS "--- [debug] found cuML")
   
   # --- CCCL, RAFT, and RMM --- #
   # CCCL before RMM, and RMM before RAFT
@@ -101,21 +123,6 @@ if(NOT CUML_CPU)
   set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
   
-  # --- treelite --- #
-  # We need to call get_treelite explicitly because we need the correct
-  # ${TREELITE_LIBS} definition for RF.
-  #
-  # And because cuml Cython code needs to headers to satisfy calls like 'cdef extern from "treelite/c_api.h"'
-  #
-  # and it needs to come before find_package(cuml), because it's a PUBLIC
-  # dependency of cuml::cuml
-  set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
-  # TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
-  set(CUML_USE_TREELITE_STATIC ON)
-  # TODO(jameslamb): should treelite just be included here?
-  set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
-  include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
-
   # --- cumlprims_mg --- #
   # TODO(jameslamb): confirm that cumlprims_mg doesn't start getting installed in conda packages
   # dynamic cumlprims_mg should be found, not installed

From cae868e58001afc5dc88a6d2cf7ddfe5a5368041 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 09:27:45 -0800
Subject: [PATCH 25/48] revert some debugging changes

---
 python/cuml/CMakeLists.txt | 32 +++-----------------------------
 1 file changed, 3 insertions(+), 29 deletions(-)

diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index f27fd2f0a7..630f577cd0 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -61,12 +61,10 @@ if(NOT CUML_CPU)
   include(rapids-export)
   rapids_cpm_init()
 
-  # re-find CCCL, RMM, and RAFT before cuVS
-  message(STATUS "--- [debug] finding CCCL for cuML")
+  # find CCCL, RMM, and RAFT before cuVS, to avoid
+  # cuVS CMake defining conflicting versions of targets like 'nvidia::cutlaass'
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_cccl.cmake)
-  message(STATUS "--- [debug] finding RMM for cuML")
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
-  message(STATUS "--- [debug] finding RAFT for cuML")
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
 
   # --- cuvs --- #
@@ -103,31 +101,7 @@ if(NOT CUML_CPU)
   message(STATUS "--- [debug] finding cuML")
   find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
   message(STATUS "--- [debug] found cuML")
-  
-  # --- CCCL, RAFT, and RMM --- #
-  # CCCL before RMM, and RMM before RAFT
-  # TODO(jameslamb): skipping CCCL because we're already getting it from libraft... is that ok?
-  #   -- Found CCCL: /pyenv/versions/3.12.8/lib/python3.12/site-packages/libraft/lib64/rapids/cmake/cccl/cccl-config.cmake (found version "2.7.0.0")
-  #     CMake Error at /pyenv/versions/3.12.8/lib/python3.12/site-packages/libraft/lib64/cmake/NvidiaCutlass/NvidiaCutlassTargets.cmake:42 (message):
-  #     Some (but not all) targets in this export set were already defined.
-  #
-  #     Targets Defined: nvidia::cutlass::cutlass
-  #     Targets not yet defined: nvidia::cutlass::tools::util
-  #
-  # include(cmake/thirdparty/get_cccl.cmake)
-  include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
-  
-  # --- raft --- #
-  # TODO(jameslamb): the 'libraft' wheel does 
-  set(CUML_RAFT_CLONE_ON_PIN OFF)
-  set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
-  include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
-  
-  # --- cumlprims_mg --- #
-  # TODO(jameslamb): confirm that cumlprims_mg doesn't start getting installed in conda packages
-  # dynamic cumlprims_mg should be found, not installed
-  set(CUML_USE_CUMLPRIMS_MG_STATIC OFF)
-  set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL ON)
+
 endif()
 
 if(CUML_CPU)

From def5a4db6fc92fc0ff6f9ab8332624bbab003619 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 10:57:51 -0800
Subject: [PATCH 26/48] use static treelite

---
 ci/build_wheel_cuml.sh        |  2 +-
 ci/build_wheel_libcuml.sh     |  2 +-
 python/cuml/CMakeLists.txt    | 61 ++++++++++++++---------------------
 python/libcuml/CMakeLists.txt | 30 ++++++++---------
 4 files changed, 40 insertions(+), 55 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 5a54c02f14..d51f8955d7 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -36,7 +36,7 @@ case "${RAPIDS_CUDA_VERSION}" in
     ;;
 esac
 
-export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS}"
+export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 mkdir -p ${package_dir}/final_dist
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index 4ce0bd0dd0..f8f0a41b77 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -53,7 +53,7 @@ case "${RAPIDS_CUDA_VERSION}" in
     ;;
 esac
 
-export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS}"
+export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 630f577cd0..25f4be2286 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -54,8 +54,32 @@ set(CUML_CPP_SRC "../../cpp")
 ################################################################################
 # - Process User Options  ------------------------------------------------------
 
+if(CUML_CPU)
+  set(CUML_UNIVERSAL OFF)
+  set(SINGLEGPU ON)
+
+  # only a subset of algorithms are supported in CPU-only cuML
+  set(CUML_ALGORITHMS "linearregression")
+  list(APPEND CUML_ALGORITHMS "pca")
+  list(APPEND CUML_ALGORITHMS "tsvd")
+  list(APPEND CUML_ALGORITHMS "elasticnet")
+  list(APPEND CUML_ALGORITHMS "logisticregression")
+  list(APPEND CUML_ALGORITHMS "ridge")
+  list(APPEND CUML_ALGORITHMS "lasso")
+  list(APPEND CUML_ALGORITHMS "umap")
+  list(APPEND CUML_ALGORITHMS "knn")
+  list(APPEND CUML_ALGORITHMS "hdbscan")
+  list(APPEND CUML_ALGORITHMS "dbscan")
+  list(APPEND CUML_ALGORITHMS "kmeans")
+
+  # this won't be needed when we add CPU libcuml++ (FIL)
+  set(cuml_sg_libraries "")
+
+  list(APPEND CYTHON_FLAGS
+  "--compile-time-env GPUBUILD=0")
+
 # cuml-cpu does not need libcuml++.so
-if(NOT CUML_CPU)
+else()
 
   include(rapids-cpm)
   include(rapids-export)
@@ -92,7 +116,6 @@ if(NOT CUML_CPU)
   set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
   # TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
   set(CUML_USE_TREELITE_STATIC ON)
-  # TODO(jameslamb): should treelite just be included here?
   set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
   message(STATUS "--- [debug] finding treelite for cuML")
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
@@ -102,31 +125,6 @@ if(NOT CUML_CPU)
   find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
   message(STATUS "--- [debug] found cuML")
 
-endif()
-
-if(CUML_CPU)
-  set(CUML_UNIVERSAL OFF)
-  set(SINGLEGPU ON)
-
-  set(CUML_ALGORITHMS "linearregression")
-  list(APPEND CUML_ALGORITHMS "pca")
-  list(APPEND CUML_ALGORITHMS "tsvd")
-  list(APPEND CUML_ALGORITHMS "elasticnet")
-  list(APPEND CUML_ALGORITHMS "logisticregression")
-  list(APPEND CUML_ALGORITHMS "ridge")
-  list(APPEND CUML_ALGORITHMS "lasso")
-  list(APPEND CUML_ALGORITHMS "umap")
-  list(APPEND CUML_ALGORITHMS "knn")
-  list(APPEND CUML_ALGORITHMS "hdbscan")
-  list(APPEND CUML_ALGORITHMS "dbscan")
-  list(APPEND CUML_ALGORITHMS "kmeans")
-
-  # this won't be needed when we add CPU libcuml++ (FIL)
-  set(cuml_sg_libraries "")
-
-  list(APPEND CYTHON_FLAGS
-  "--compile-time-env GPUBUILD=0")
-else()
   set(cuml_sg_libraries cuml::${CUML_CPP_TARGET})
   set(cuml_mg_libraries cuml::${CUML_CPP_TARGET})
 
@@ -134,15 +132,6 @@ else()
   "--compile-time-env GPUBUILD=1")
 endif()
 
-# TODO(jameslamb): should SINGLEGPU move into libcuml?
-if(NOT SINGLEGPU)
-  include("${CUML_CPP_SRC}/cmake/thirdparty/get_cumlprims_mg.cmake")
-  set(cuml_mg_libraries
-    cuml::${CUML_CPP_TARGET}
-    cumlprims_mg::cumlprims_mg
-  )
-endif()
-
  ################################################################################
  # - Build Cython artifacts -----------------------------------------------------
 
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index de8605e012..8509ecf682 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -52,42 +52,39 @@ endif()
 
 unset(cuml_FOUND)
 
+# --- cuML --- #
 set(BUILD_CUML_TESTS OFF)
 set(BUILD_PRIMS_TESTS OFF)
 set(BUILD_CUML_C_LIBRARY OFF)
 set(BUILD_CUML_EXAMPLES OFF)
 set(BUILD_CUML_BENCH OFF)
 
-# --- treelite --- #
-set(CUML_EXPORT_TREELITE_LINKAGE ON)
-set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
-# use dynamic treelite (provided by treelite wheels)
-# TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
-set(CUML_USE_TREELITE_STATIC ON)
-# TODO(jameslamb): should treelite just be included here?
-set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
-
-# --- raft --- #
-set(CUML_RAFT_CLONE_ON_PIN OFF)
-set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
-
 # --- cumlprims_mg --- #
 # ship cumlprims_mg in the 'libcuml' wheel (for re-use by 'cuml' wheels)
 set(CUML_USE_CUMLPRIMS_MG_STATIC OFF)
-# TODO(jameslamb): figure out how cumlprims_mg should work (re-download in cuml? only install/export the headers in libcuml?)
 set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL OFF)
 
-# --- cuvs --- #
+# --- cuVS --- #
 if(USE_CUVS_WHEEL)
   set(CUML_USE_CUVS_STATIC OFF)
 else()
   set(CUML_USE_CUVS_STATIC ON)
 endif()
 
-# either way, don't unclude any cuvs stuff in wheel
+# don't unclude any cuVS stuff in libcuml wheel
 # (expect anything building against libcuml to provide cuvs headers externally)
 set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
 
+# --- raft --- #
+set(CUML_RAFT_CLONE_ON_PIN OFF)
+set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
+
+# --- treelite --- #
+set(CUML_EXPORT_TREELITE_LINKAGE ON)
+set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
+set(CUML_USE_TREELITE_STATIC ON)
+set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
+
 # --- CUDA --- #
 set(CUDA_STATIC_RUNTIME ON)
 
@@ -106,7 +103,6 @@ add_subdirectory(../../cpp cuml-cpp)
 
 if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
   # assumes libcuml++ is installed 2 levels deep, e.g. site-packages/cuml/lib64/libcuml++.so
-  # TODO(jameslamb): is nvjitlink even necessary?
   set(rpaths
     "$ORIGIN/../../nvidia/cublas/lib"
     "$ORIGIN/../../nvidia/cufft/lib"

From 83b28d96af3e25183e5b64dbad107da5d81bed0e Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 11:17:57 -0800
Subject: [PATCH 27/48] bigger pydistcheck threshold, remove cuvs from build
 env

---
 dependencies.yaml             | 1 -
 python/cuml/pyproject.toml    | 4 +---
 python/libcuml/pyproject.toml | 3 +++
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 44c62e17d3..6d1f5975a7 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -110,7 +110,6 @@ files:
       key: requires
     includes:
       - common_build
-      - depends_on_cuvs
       - depends_on_libcuml
       - depends_on_libcumlprims
       - depends_on_libraft
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index b0e6137c74..313a5d5d1b 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -177,13 +177,11 @@ versioneer\.py |
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
 # TODO(jameslamb): can 'cuda_wheels=true' be removed?
-matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
+matrix-entry = "cuda_suffixed=true"
 
-# TODO(jameslamb): check build deps for cuml
 requires = [
     "cmake>=3.26.4,!=3.30.0",
     "cuda-python",
-    "cuvs==25.2.*,>=0.0.0a0",
     "cython>=3.0.0",
     "libcuml==25.2.*,>=0.0.0a0",
     "libraft==25.2.*,>=0.0.0a0",
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
index e0df96779b..f922e9bf70 100644
--- a/python/libcuml/pyproject.toml
+++ b/python/libcuml/pyproject.toml
@@ -59,6 +59,9 @@ select = [
     "distro-too-large-compressed",
 ]
 
+# TODO(jameslamb): update ci/validate_whee.sh
+max_allowed_size_compressed = '5.0G'
+
 [tool.scikit-build]
 build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"

From 1d5326fc06545aafc532bb3f5840a0922b53f752 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 11:52:23 -0800
Subject: [PATCH 28/48] fix validation working dir

---
 ci/build_wheel_cuml.sh    | 2 +-
 ci/build_wheel_libcuml.sh | 2 +-
 ci/validate_wheel.sh      | 5 ++++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index d51f8955d7..d0017d1b8a 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -45,6 +45,6 @@ python -m auditwheel repair \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
-./ci/validate_wheel.sh ${package_dir}/final_dist
+./ci/validate_wheel.sh ${package_dir} final_dist
 
 RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python "${package_dir}/final_dist"
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index f8f0a41b77..472d69a9ef 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -64,6 +64,6 @@ python -m auditwheel repair \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
-./ci/validate_wheel.sh ${package_dir}/final_dist
+./ci/validate_wheel.sh ${package_dir} final_dist
 
 RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index 39beda002c..a8753f9bcb 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -3,7 +3,10 @@
 
 set -euo pipefail
 
-wheel_dir_relative_path=$1
+package_dir=$1
+wheel_dir_relative_path=$2
+
+cd "${package_dir}"
 
 rapids-logger "validate packages with 'pydistcheck'"
 

From 2ef32eaa006a84c0bd16220bb8e8af34198fbee8 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 13:26:41 -0800
Subject: [PATCH 29/48] try to fix more builds

---
 .github/workflows/build.yaml            | 2 +-
 .github/workflows/pr.yaml               | 2 +-
 cpp/cmake/thirdparty/get_treelite.cmake | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 1d2a285a68..59cbe73dde 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -96,7 +96,7 @@ jobs:
       branch: ${{ inputs.branch }}
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
-      script: ci/build_wheel.sh
+      script: ci/build_wheel_cuml.sh
       # Note that this approach to cloning repos obviates any modification to
       # the CMake variables in get_cumlprims_mg.cmake since CMake will just use
       # the clone as is.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 1526081d67..b66958cb13 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -188,7 +188,7 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
       build_type: pull-request
-      script: ci/build_wheel.sh
+      script: ci/build_wheel_cuml.sh
       extra-repo: rapidsai/cumlprims_mg
       extra-repo-sha: branch-25.02
       extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
diff --git a/cpp/cmake/thirdparty/get_treelite.cmake b/cpp/cmake/thirdparty/get_treelite.cmake
index a14bacb531..e197b76a11 100644
--- a/cpp/cmake/thirdparty/get_treelite.cmake
+++ b/cpp/cmake/thirdparty/get_treelite.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@ function(find_and_configure_treelite)
 
     rapids_cpm_find(Treelite ${PKG_VERSION}
         GLOBAL_TARGETS       ${TREELITE_LIBS}
+        BUILD_EXPORT_SET     cuml-exports
         INSTALL_EXPORT_SET   cuml-exports
         CPM_ARGS
             GIT_REPOSITORY   https://github.com/dmlc/treelite.git

From e35df327154297811fd069da0a1e12bf7122a0e8 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 15:46:33 -0800
Subject: [PATCH 30/48] use dynamic treelite in conda builds

---
 ci/build_wheel_cuml.sh     |  2 +-
 python/cuml/CMakeLists.txt | 29 ++++++++++++++++++++++-------
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index d0017d1b8a..d0b0880c14 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -36,7 +36,7 @@ case "${RAPIDS_CUDA_VERSION}" in
     ;;
 esac
 
-export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF"
+export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 mkdir -p ${package_dir}/final_dist
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 25f4be2286..cf12d4b64a 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -40,6 +40,7 @@ project(
 option(CUML_UNIVERSAL "Build all cuML Python components." ON)
 option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
 option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
+option(USE_LIBCUML_WHEEL "Use libcuml wheel to provide some dependencies" OFF)
 
 # todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
 # https://github.com/rapidsai/cuml/issues/4843
@@ -85,13 +86,14 @@ else()
   include(rapids-export)
   rapids_cpm_init()
 
-  # find CCCL, RMM, and RAFT before cuVS, to avoid
+  # --- CCCL, RAFT, RMM ---#
+  # find CCCL, RAFT, and RMM before cuVS, to avoid
   # cuVS CMake defining conflicting versions of targets like 'nvidia::cutlaass'
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_cccl.cmake)
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
 
-  # --- cuvs --- #
+  # --- cuVS --- #
   # Once there are 'libcuvs' wheels, it should be possible to remove this CPM build of cuvs.
   #
   #   * conda builds will find 'libcuvs' in the build environment
@@ -112,15 +114,28 @@ else()
   # And because cuml Cython code needs to headers to satisfy calls like 'cdef extern from "treelite/c_api.h"'
   #
   # and it needs to come before find_package(cuml), because it's a PUBLIC
-  # dependency of cuml::cuml
-  set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
-  # TODO(jameslamb): is it safe for libcuml and cuml to both use their own static treelite?
-  set(CUML_USE_TREELITE_STATIC ON)
+  # dependency of cuml::cuml.
+  #
+  # TODO(jameslamb): clean up these comments
+
+  # wheel builds use a static treelite, because the 'libtreelite.so' in 'treelite' wheels
+  # isn't intended for dynamic linking by third-party projects (e.g. hides its symbols)
+  if(USE_LIBCUML_WHEEL)
+    # TODO(jameslamb): is it safe for libcuml++ libcuml wheels and Cython extensions in 'cuml' wheels
+    #                  to use separate static treelite?
+    set(CUML_PYTHON_TREELITE_TARGET treelite::treelite_static)
+    set(CUML_USE_TREELITE_STATIC ON)
+  else()
+    set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)
+    set(CUML_USE_TREELITE_STATIC OFF)
+  endif()
+
   set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
+
   message(STATUS "--- [debug] finding treelite for cuML")
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
-  message(STATUS "--- [debug] done finding treelite for cuML")
 
+  # --- libcuml --- #
   message(STATUS "--- [debug] finding cuML")
   find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
   message(STATUS "--- [debug] found cuML")

From 78536817f83716c6c65b65b2f200e4cb31514e76 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 19:10:48 -0800
Subject: [PATCH 31/48] refine build dependencies

---
 .gitignore                     |  1 -
 ci/build_wheel_cuml.sh         | 13 ++++++++++++-
 ci/build_wheel_libcuml.sh      |  3 ++-
 ci/validate_wheel.sh           | 19 +++++++++++++++++--
 dependencies.yaml              |  3 ---
 python/cuml/CMakeLists.txt     |  2 --
 python/cuml/pyproject.toml     | 11 ++---------
 python/libcuml/libcuml/load.py |  4 ----
 python/libcuml/pyproject.toml  |  5 -----
 9 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/.gitignore b/.gitignore
index 210ec2eaca..e7f8328d0f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,7 +32,6 @@ tmp/
 .hypothesis
 wheels/
 wheelhouse/
-raft_log.txt
 _skbuild/
 
 ## files pickled in notebook when ran during python docstring generation
diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index d0b0880c14..e02d5e5da8 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -8,9 +8,21 @@ package_dir="python/cuml"
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
+# Download the libcuml wheel built in the previous step and make it
+# available for pip to find.
+LIBCUML_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuml_dist)
+
 # TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
 source ./ci/use_wheels_from_prs.sh
 
+cat >> ./constraints.txt <<EOF
+libcuml-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${LIBCUGRAPH_WHEELHOUSE}/libcuml_*.whl)
+EOF
+
+# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints
+# are used when creating the isolated build environment.
+export PIP_CONSTRAINT="${PWD}/constraints.txt"
+
 EXCLUDE_ARGS=(
   --exclude "libcuml++.so"
   --exclude "libcumlprims_mg.so"
@@ -20,7 +32,6 @@ EXCLUDE_ARGS=(
 case "${RAPIDS_CUDA_VERSION}" in
   12.*)
     EXCLUDE_ARGS+=(
-      --exclude "libcuvs.so"
       --exclude "libcublas.so.12"
       --exclude "libcublasLt.so.12"
       --exclude "libcufft.so.11"
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index 472d69a9ef..886958082a 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -28,7 +28,8 @@ python -m pip install \
 # 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
 export PIP_NO_BUILD_ISOLATION=0
 
-# TODO(jameslamb): it's weird to "exclude" libcumlprims_mg just as a way to stop auditwheel from complaining... when it really is in the wheel
+# NOTE: 'libcumlprims_mg.so' is marked as '--exclude' here because auditwheel doesn't detect it,
+#       but it really is intentionally included in 'libcuml' wheels
 EXCLUDE_ARGS=(
   --exclude "libcumlprims_mg.so"
   --exclude "libcuvs.so"
diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index a8753f9bcb..bcc67af0c7 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -6,14 +6,29 @@ set -euo pipefail
 package_dir=$1
 wheel_dir_relative_path=$2
 
+RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
+
+# some packages are much larger on CUDA 11 than on CUDA 12
+PYDISTCHECK_ARGS=()
+if [[ "${package_dir}" == "python/libcuml" ]]; then
+    if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then
+        PYDISTCHECK_ARGS+=(
+            --max-allowed-size-compressed '1.0G'
+        )
+    else
+        PYDISTCHECK_ARGS+=(
+            --max-allowed-size-compressed '500M'
+        )
+    fi
+fi
+
 cd "${package_dir}"
 
 rapids-logger "validate packages with 'pydistcheck'"
 
-# TODO(jameslamb) add libcuml here
-
 pydistcheck \
     --inspect \
+    "${PYDISTCHECK_ARGS[@]}" \
     "$(echo ${wheel_dir_relative_path}/*.whl)"
 
 rapids-logger "validate packages with 'twine'"
diff --git a/dependencies.yaml b/dependencies.yaml
index 6d1f5975a7..ec6dd60139 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -124,7 +124,6 @@ files:
     extras:
       table: project
     includes:
-      - cuda_wheels
       - depends_on_cudf
       - depends_on_cupy
       - depends_on_cuvs
@@ -161,7 +160,6 @@ files:
       - common_build
       - depends_on_libraft
       - depends_on_librmm
-      - depends_on_treelite
       - py_build
   py_run_libcuml:
     output: pyproject
@@ -172,7 +170,6 @@ files:
       - cuda_wheels
       - depends_on_cuvs
       - depends_on_libraft
-      - depends_on_treelite
 channels:
   - rapidsai
   - rapidsai-nightly
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index cf12d4b64a..c5e988feff 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -19,9 +19,7 @@ include(../../rapids_config.cmake)
 option(CUML_CPU "Build only cuML CPU Python components." OFF)
 set(language_list "CXX")
 
-# TODO(jameslamb): can this be removed? (I think so?)
 if(NOT CUML_CPU)
-  # TODO(jameslamb): where does rapids_cuda_init_architectures belong?
   # We always need CUDA for cuML GPU because the raft dependency brings in a
   # header-only cuco dependency that enables CUDA unconditionally.
   include(rapids-cuda)
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index 313a5d5d1b..3dd96f02de 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -20,13 +20,12 @@ requires = [
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.pydistcheck]
-# TODO(jameslamb): update this once libcuml is working
 select = [
     "distro-too-large-compressed",
 ]
 
-# detect when package size grows significantly
-max_allowed_size_compressed = '1.5G'
+# PyPI limit is 100 MiB, fail CI before we get too close to that
+max_allowed_size_compressed = '75M'
 
 [tool.pytest.ini_options]
 addopts = "--tb=native"
@@ -102,11 +101,6 @@ dependencies = [
     "libcuml==25.2.*,>=0.0.0a0",
     "numba>=0.57",
     "numpy>=1.23,<3.0a0",
-    "nvidia-cublas",
-    "nvidia-cufft",
-    "nvidia-curand",
-    "nvidia-cusolver",
-    "nvidia-cusparse",
     "packaging",
     "pylibraft==25.2.*,>=0.0.0a0",
     "raft-dask==25.2.*,>=0.0.0a0",
@@ -176,7 +170,6 @@ versioneer\.py |
 [tool.rapids-build-backend]
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
-# TODO(jameslamb): can 'cuda_wheels=true' be removed?
 matrix-entry = "cuda_suffixed=true"
 
 requires = [
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
index 0b51ec00f7..1d973d8309 100644
--- a/python/libcuml/libcuml/load.py
+++ b/python/libcuml/libcuml/load.py
@@ -67,10 +67,6 @@ def load_library():
         # the loader can find it.
         pass
 
-    # treelite must be loaded before libcuml++ because libcuml++
-    # references its symbols
-    import treelite
-
     prefer_system_installation = (
         os.getenv("RAPIDS_LIBCUML_PREFER_SYSTEM_LIBRARY", "false").lower()
         != "false"
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
index f922e9bf70..4f6fc0d349 100644
--- a/python/libcuml/pyproject.toml
+++ b/python/libcuml/pyproject.toml
@@ -44,7 +44,6 @@ dependencies = [
     "nvidia-curand",
     "nvidia-cusolver",
     "nvidia-cusparse",
-    "treelite==4.3.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
@@ -59,9 +58,6 @@ select = [
     "distro-too-large-compressed",
 ]
 
-# TODO(jameslamb): update ci/validate_whee.sh
-max_allowed_size_compressed = '5.0G'
-
 [tool.scikit-build]
 build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"
@@ -89,5 +85,4 @@ requires = [
     "libraft==25.2.*,>=0.0.0a0",
     "librmm==25.2.*,>=0.0.0a0",
     "ninja",
-    "treelite==4.3.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From ba3387f69e8e33988db5786d5bc15445cc85f9f1 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 19:33:17 -0800
Subject: [PATCH 32/48] more dependency fixes

---
 conda/environments/all_cuda-118_arch-x86_64.yaml |  1 -
 conda/environments/all_cuda-125_arch-x86_64.yaml |  1 -
 dependencies.yaml                                | 14 +++-----------
 python/cuml/pyproject.toml                       |  1 +
 python/libcuml/pyproject.toml                    |  1 -
 5 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 05cfd3d7ae..8eebc69c64 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -42,7 +42,6 @@ dependencies:
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
 - libcuvs==25.2.*,>=0.0.0a0
-- libraft-headers==25.2.*,>=0.0.0a0
 - libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index a2daa61350..cb0736700d 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -39,7 +39,6 @@ dependencies:
 - libcusolver-dev
 - libcusparse-dev
 - libcuvs==25.2.*,>=0.0.0a0
-- libraft-headers==25.2.*,>=0.0.0a0
 - libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
diff --git a/dependencies.yaml b/dependencies.yaml
index cea56904d4..8f5061dc76 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -17,12 +17,10 @@ files:
       - depends_on_libcumlprims
       - depends_on_libcuvs
       - depends_on_libraft
-      - depends_on_libraft_headers
       - depends_on_librmm
       - depends_on_pylibraft
       - depends_on_raft_dask
       - depends_on_rmm
-      - depends_on_treelite
       - docs
       - py_build_cuml
       - py_run_cuml
@@ -111,13 +109,13 @@ files:
       key: requires
     includes:
       - common_build
+      - depends_on_cuda_python
       - depends_on_libcuml
       - depends_on_libcumlprims
       - depends_on_libraft
       - depends_on_librmm
       - depends_on_pylibraft
       - depends_on_rmm
-      - depends_on_treelite
       - py_build_cuml
   py_run_cuml:
     output: pyproject
@@ -134,7 +132,6 @@ files:
       - depends_on_pylibraft
       - depends_on_raft_dask
       - depends_on_rmm
-      - depends_on_treelite
       - py_run_cuml
   py_test_cuml:
     output: pyproject
@@ -158,11 +155,9 @@ files:
       table: tool.rapids-build-backend
       key: requires
     includes:
-      # TODO(jameslamb): split out cuda-python and cython
       - common_build
       - depends_on_libraft
       - depends_on_librmm
-      - py_build_cuml
   py_run_libcuml:
     output: pyproject
     pyproject_dir: python/libcuml
@@ -250,6 +245,7 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - &cython cython>=3.0.0
+          - &treelite treelite==4.3.0
 
   py_run_cuml:
     common:
@@ -265,6 +261,7 @@ dependencies:
           - scipy>=1.8.0
           - packaging
           - rapids-dask-dependency==25.2.*,>=0.0.0a0
+          - *treelite
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -766,8 +763,3 @@ dependencies:
           - matrix:
             packages:
               - *rmm_unsuffixed
-  depends_on_treelite:
-    common:
-      - output_types: [conda, requirements, pyproject]
-        packages:
-          - treelite==4.3.0
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index f0e54f6695..cf4bb36461 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -175,6 +175,7 @@ matrix-entry = "cuda_suffixed=true"
 
 requires = [
     "cmake>=3.26.4,!=3.30.0",
+    "cuda-python",
     "cython>=3.0.0",
     "libcuml==25.2.*,>=0.0.0a0",
     "libraft==25.2.*,>=0.0.0a0",
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
index d8acefce3c..0caace57f4 100644
--- a/python/libcuml/pyproject.toml
+++ b/python/libcuml/pyproject.toml
@@ -80,7 +80,6 @@ dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
 requires = [
     "cmake>=3.26.4,!=3.30.0",
-    "cython>=3.0.0",
     "libraft==25.2.*,>=0.0.0a0",
     "librmm==25.2.*,>=0.0.0a0",
     "ninja",

From 1f9ac8979d203ba8bd826961046cda5a7520a186 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 20:35:04 -0800
Subject: [PATCH 33/48] fix scripts

---
 ci/build_wheel_cuml.sh         | 2 +-
 python/libcuml/libcuml/load.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index e02d5e5da8..51ffc62daa 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -16,7 +16,7 @@ LIBCUML_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rap
 source ./ci/use_wheels_from_prs.sh
 
 cat >> ./constraints.txt <<EOF
-libcuml-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${LIBCUGRAPH_WHEELHOUSE}/libcuml_*.whl)
+libcuml-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${LIBCUML_WHEELHOUSE}/libcuml_*.whl)
 EOF
 
 # Using env variable PIP_CONSTRAINT is necessary to ensure the constraints
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
index 1d973d8309..9e85ba2e94 100644
--- a/python/libcuml/libcuml/load.py
+++ b/python/libcuml/libcuml/load.py
@@ -101,6 +101,6 @@ def load_library():
 
     # The caller almost never needs to do anything with these libraries, but no
     # harm in offering the option since these objects at least provide handles
-    # to inspect where libcugraph was loaded from.
+    # to inspect where libcuml was loaded from.
 
     return libs_to_return

From 82e3f162870efca1f3ce6bf08c78e363f5ad3e4c Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 13 Jan 2025 21:45:19 -0800
Subject: [PATCH 34/48] update RAFT commit, build cuml without build isolation

---
 ci/build_wheel_cuml.sh            | 32 ++++++++++++++++++++++++-------
 ci/use_conda_packages_from_prs.sh |  2 +-
 ci/use_wheels_from_prs.sh         |  2 +-
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 51ffc62daa..9c27d795bf 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -6,8 +6,6 @@ set -euo pipefail
 package_name="cuml"
 package_dir="python/cuml"
 
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-
 # Download the libcuml wheel built in the previous step and make it
 # available for pip to find.
 LIBCUML_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuml_dist)
@@ -23,6 +21,30 @@ EOF
 # are used when creating the isolated build environment.
 export PIP_CONSTRAINT="${PWD}/constraints.txt"
 
+rapids-logger "Generating build requirements"
+
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ./ci/use_wheels_from_prs.sh
+
+rapids-dependency-file-generator \
+  --output requirements \
+  --file-key "py_build_${package_name}" \
+  --file-key "py_rapids_build_${package_name}" \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \
+| tee /tmp/requirements-build.txt
+
+rapids-logger "Installing build requirements"
+python -m pip install \
+    -v \
+    --prefer-binary \
+    -r /tmp/requirements-build.txt
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+# build with '--no-build-isolation', for better sccache hit rate
+# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
+export PIP_NO_BUILD_ISOLATION=0
+
 EXCLUDE_ARGS=(
   --exclude "libcuml++.so"
   --exclude "libcumlprims_mg.so"
@@ -40,14 +62,10 @@ case "${RAPIDS_CUDA_VERSION}" in
       --exclude "libcusparse.so.12"
       --exclude "libnvJitLink.so.12"
     )
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
-    ;;
-  11.*)
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
     ;;
 esac
 
-export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
+export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON;-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 mkdir -p ${package_dir}/final_dist
diff --git a/ci/use_conda_packages_from_prs.sh b/ci/use_conda_packages_from_prs.sh
index ac17020687..c24226a550 100644
--- a/ci/use_conda_packages_from_prs.sh
+++ b/ci/use_conda_packages_from_prs.sh
@@ -1,7 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 # TODO(jameslamb): remove this file when https://github.com/rapidsai/raft/pull/2531 is merged
 
-RAFT_COMMIT="d275c995fb51310d1340fe2fd6d63d0bfd43cafa"
+RAFT_COMMIT="4b793be27b27d40119706ea5df26cc03c8efe33c"
 
 RAFT_CPP_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}")
 RAFT_PYTHON_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 python "${RAFT_COMMIT:0:7}")
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
index ce5b5eb8a2..904d740003 100644
--- a/ci/use_wheels_from_prs.sh
+++ b/ci/use_wheels_from_prs.sh
@@ -3,7 +3,7 @@
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-RAFT_COMMIT="d275c995fb51310d1340fe2fd6d63d0bfd43cafa"
+RAFT_COMMIT="4b793be27b27d40119706ea5df26cc03c8efe33c"
 LIBRAFT_CHANNEL=$(
   RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}"
 )

From 53c8b35513bfcf22c1e7952f7286a0d336c27510 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 14 Jan 2025 06:28:01 -0800
Subject: [PATCH 35/48] try fixing math wheels linkage for CUDA 11

---
 ci/build_wheel_cuml.sh     |  6 +++++-
 python/cuml/CMakeLists.txt | 22 +++++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 9c27d795bf..bed487cc13 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -62,10 +62,14 @@ case "${RAPIDS_CUDA_VERSION}" in
       --exclude "libcusparse.so.12"
       --exclude "libnvJitLink.so.12"
     )
+    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
+    ;;
+  11.*)
+    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
     ;;
 esac
 
-export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON;-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
+export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 mkdir -p ${package_dir}/final_dist
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index c5e988feff..e2db8105ce 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -37,12 +37,14 @@ project(
 # - User Options  --------------------------------------------------------------
 option(CUML_UNIVERSAL "Build all cuML Python components." ON)
 option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
+option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF)
 option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
 option(USE_LIBCUML_WHEEL "Use libcuml wheel to provide some dependencies" OFF)
 
 # todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
 # https://github.com/rapidsai/cuml/issues/4843
 message(VERBOSE "CUML_PY: Build only cuML CPU Python components.: ${CUML_CPU}")
+message(VERBOSE "CUML_PY: Use CUDA math wheels instead of system libraries: ${USE_CUDA_MATH_WHEELS}")
 message(VERBOSE "CUML_PY: Disabling all mnmg components and comms libraries: ${SINGLEGPU}")
 
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")
@@ -84,9 +86,27 @@ else()
   include(rapids-export)
   rapids_cpm_init()
 
+  # --- CUDA --- #
+  set(CUDA_STATIC_RUNTIME ON)
+
+  # Link to the CUDA wheels with shared libraries for CUDA 12+
+  #
+  # This is here because we're rebuilding cuVS below...without it, cuVS on CUDA 11
+  # dynamically links to CUDA math libs (cuBLAS, cuFFT, etc.), and then
+  # that linkage results in those libraries being vendored in wheels by auditwheel.
+  find_package(CUDAToolkit REQUIRED)
+  if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0)
+    set(CUDA_STATIC_MATH_LIBRARIES OFF)
+  else()
+    if(USE_CUDA_MATH_WHEELS)
+      message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0")
+    endif()
+    set(CUDA_STATIC_MATH_LIBRARIES ON)
+  endif()
+
   # --- CCCL, RAFT, RMM ---#
   # find CCCL, RAFT, and RMM before cuVS, to avoid
-  # cuVS CMake defining conflicting versions of targets like 'nvidia::cutlaass'
+  # cuVS CMake defining conflicting versions of targets like 'nvidia::cutlass::cutlass'
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_cccl.cmake)
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)

From 002c0f3b946ce0346cf11473b2943fbb3fb59cbe Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 14 Jan 2025 06:29:37 -0800
Subject: [PATCH 36/48] add use_cuda_wheels

---
 dependencies.yaml          | 1 +
 python/cuml/pyproject.toml | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 8f5061dc76..dca6bd0164 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -123,6 +123,7 @@ files:
     extras:
       table: project
     includes:
+      - cuda_wheels
       - depends_on_cuda_python
       - depends_on_cudf
       - depends_on_cupy
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index cf4bb36461..5a96d98874 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -102,6 +102,11 @@ dependencies = [
     "libcuml==25.2.*,>=0.0.0a0",
     "numba>=0.57",
     "numpy>=1.23,<3.0a0",
+    "nvidia-cublas",
+    "nvidia-cufft",
+    "nvidia-curand",
+    "nvidia-cusolver",
+    "nvidia-cusparse",
     "packaging",
     "pylibraft==25.2.*,>=0.0.0a0",
     "raft-dask==25.2.*,>=0.0.0a0",
@@ -171,7 +176,7 @@ versioneer\.py |
 [tool.rapids-build-backend]
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
-matrix-entry = "cuda_suffixed=true"
+matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
 
 requires = [
     "cmake>=3.26.4,!=3.30.0",

From 35c88acd7c9d881960ab44a35059438c4da0813b Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 14 Jan 2025 07:53:20 -0800
Subject: [PATCH 37/48] fix build_wheel_cuml.sh again

---
 ci/build_wheel_cuml.sh    | 4 ++--
 ci/build_wheel_libcuml.sh | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index bed487cc13..b411c38919 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -6,6 +6,8 @@ set -euo pipefail
 package_name="cuml"
 package_dir="python/cuml"
 
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
 # Download the libcuml wheel built in the previous step and make it
 # available for pip to find.
 LIBCUML_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuml_dist)
@@ -39,8 +41,6 @@ python -m pip install \
     --prefer-binary \
     -r /tmp/requirements-build.txt
 
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-
 # build with '--no-build-isolation', for better sccache hit rate
 # 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
 export PIP_NO_BUILD_ISOLATION=0
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index 886958082a..b6d7bea275 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -6,6 +6,8 @@ set -euo pipefail
 package_name="libcuml"
 package_dir="python/libcuml"
 
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
 rapids-logger "Generating build requirements"
 
 # TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
@@ -57,8 +59,6 @@ esac
 export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-
 mkdir -p ${package_dir}/final_dist
 python -m auditwheel repair \
     "${EXCLUDE_ARGS[@]}" \

From 0e620181721edb436ccaf8e0ae0d80e45a6de402 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 14 Jan 2025 08:43:55 -0800
Subject: [PATCH 38/48] try to fix build_wheel_cuml.sh again

---
 ci/build_wheel_cuml.sh | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index b411c38919..58d567e5f6 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -10,19 +10,11 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libcuml wheel built in the previous step and make it
 # available for pip to find.
-LIBCUML_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuml_dist)
+RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuml_dist
 
 # TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
 source ./ci/use_wheels_from_prs.sh
 
-cat >> ./constraints.txt <<EOF
-libcuml-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${LIBCUML_WHEELHOUSE}/libcuml_*.whl)
-EOF
-
-# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints
-# are used when creating the isolated build environment.
-export PIP_CONSTRAINT="${PWD}/constraints.txt"
-
 rapids-logger "Generating build requirements"
 
 # TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
@@ -39,7 +31,8 @@ rapids-logger "Installing build requirements"
 python -m pip install \
     -v \
     --prefer-binary \
-    -r /tmp/requirements-build.txt
+    -r /tmp/requirements-build.txt \
+    /tmp/libcuml_dist/libcuml_*.whl
 
 # build with '--no-build-isolation', for better sccache hit rate
 # 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)

From f5b65905a562ec3c7cd3959efb2fc0555e470d12 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 16 Jan 2025 11:56:16 -0800
Subject: [PATCH 39/48] misc. changes

---
 .github/workflows/build.yaml      | 1 +
 ci/build_wheel_cuml.sh            | 2 +-
 ci/build_wheel_libcuml.sh         | 2 +-
 ci/use_conda_packages_from_prs.sh | 2 +-
 ci/use_wheels_from_prs.sh         | 2 +-
 python/cuml/CMakeLists.txt        | 3 +--
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 59cbe73dde..ad2edc7b8b 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -89,6 +89,7 @@ jobs:
       package-name: libcuml
       package-type: cpp
   wheel-build-cuml:
+    needs: wheel-build-libcuml
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 58d567e5f6..bd5d9da86f 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -62,7 +62,7 @@ case "${RAPIDS_CUDA_VERSION}" in
     ;;
 esac
 
-export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
+export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 mkdir -p ${package_dir}/final_dist
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index b6d7bea275..a795ccc987 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -56,7 +56,7 @@ case "${RAPIDS_CUDA_VERSION}" in
     ;;
 esac
 
-export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF"
+export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 mkdir -p ${package_dir}/final_dist
diff --git a/ci/use_conda_packages_from_prs.sh b/ci/use_conda_packages_from_prs.sh
index c24226a550..3b1cba3bfb 100644
--- a/ci/use_conda_packages_from_prs.sh
+++ b/ci/use_conda_packages_from_prs.sh
@@ -1,7 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 # TODO(jameslamb): remove this file when https://github.com/rapidsai/raft/pull/2531 is merged
 
-RAFT_COMMIT="4b793be27b27d40119706ea5df26cc03c8efe33c"
+RAFT_COMMIT="0d6597b08919f2aae8ac268f1a68d6a8fe5beb4e"
 
 RAFT_CPP_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}")
 RAFT_PYTHON_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 python "${RAFT_COMMIT:0:7}")
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
index 904d740003..aea295d4bd 100644
--- a/ci/use_wheels_from_prs.sh
+++ b/ci/use_wheels_from_prs.sh
@@ -3,7 +3,7 @@
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-RAFT_COMMIT="4b793be27b27d40119706ea5df26cc03c8efe33c"
+RAFT_COMMIT="0d6597b08919f2aae8ac268f1a68d6a8fe5beb4e"
 LIBRAFT_CHANNEL=$(
   RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}"
 )
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index e2db8105ce..b8aeb4068f 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -107,8 +107,7 @@ else()
   # --- CCCL, RAFT, RMM ---#
   # find CCCL, RAFT, and RMM before cuVS, to avoid
   # cuVS CMake defining conflicting versions of targets like 'nvidia::cutlass::cutlass'
-  include(${CUML_CPP_SRC}/cmake/thirdparty/get_cccl.cmake)
-  include(${CUML_CPP_SRC}/cmake/thirdparty/get_rmm.cmake)
+  # include(${CUML_CPP_SRC}/cmake/thirdparty/get_cccl.cmake)
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
 
   # --- cuVS --- #

From c79617b33f61add2722438e64a37e15afaaaa1bb Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 16 Jan 2025 12:13:29 -0800
Subject: [PATCH 40/48] rapids_cuda_init_architectures()

---
 python/cuml/CMakeLists.txt    | 4 ++++
 python/libcuml/CMakeLists.txt | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index b8aeb4068f..95502c9d36 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -160,6 +160,10 @@ else()
   set(cuml_sg_libraries cuml::${CUML_CPP_TARGET})
   set(cuml_mg_libraries cuml::${CUML_CPP_TARGET})
 
+  if(NOT SINGLEGPU)
+    list(APPEND cuml_mg_libraries cumlprims_mg::cumlprims_mg)
+  endif()
+
   list(APPEND CYTHON_FLAGS
   "--compile-time-env GPUBUILD=1")
 endif()
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index 8509ecf682..a7f6ea29e6 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -16,10 +16,13 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 
 include(../../rapids_config.cmake)
 
+include(rapids-cuda)
+rapids_cuda_init_architectures(libcugraph-python)
+
 project(
   libcuml-python
   VERSION "${RAPIDS_VERSION}"
-  LANGUAGES CXX
+  LANGUAGES CXX CUDA
 )
 
 ################################################################################

From 57db9f7a8a645c7aa661f27c0962632c5e00a958 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 17 Jan 2025 09:54:13 -0600
Subject: [PATCH 41/48] Revert libraft testing changes.

---
 ci/build_cpp.sh                   |  5 +----
 ci/build_docs.sh                  |  5 +----
 ci/build_python.sh                |  5 +----
 ci/build_wheel_cuml.sh            |  6 ------
 ci/build_wheel_libcuml.sh         |  3 ---
 ci/test_cpp.sh                    |  5 +----
 ci/test_notebooks.sh              |  5 +----
 ci/test_python_common.sh          |  5 +----
 ci/use_conda_packages_from_prs.sh | 10 ----------
 ci/use_wheels_from_prs.sh         | 22 ----------------------
 10 files changed, 6 insertions(+), 65 deletions(-)
 delete mode 100644 ci/use_conda_packages_from_prs.sh
 delete mode 100644 ci/use_wheels_from_prs.sh

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index 7c35f35be5..fa066c03c6 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -9,9 +9,6 @@ source rapids-configure-sccache
 
 source rapids-date-string
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
-source ci/use_conda_packages_from_prs.sh
-
 export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index a1cc16eb10..05f1f24ee5 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 set -euo pipefail
 
 rapids-logger "Create test conda environment"
@@ -8,9 +8,6 @@ rapids-logger "Create test conda environment"
 RAPIDS_VERSION="$(rapids-version)"
 export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
-source ci/use_conda_packages_from_prs.sh
-
 rapids-dependency-file-generator \
   --output conda \
   --file-key docs \
diff --git a/ci/build_python.sh b/ci/build_python.sh
index d0b5724f89..e8d2b3ed39 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -9,9 +9,6 @@ source rapids-configure-sccache
 
 source rapids-date-string
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
-source ci/use_conda_packages_from_prs.sh
-
 export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index bd5d9da86f..4b8525ce78 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -12,14 +12,8 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 # available for pip to find.
 RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuml_dist
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
-source ./ci/use_wheels_from_prs.sh
-
 rapids-logger "Generating build requirements"
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
-source ./ci/use_wheels_from_prs.sh
-
 rapids-dependency-file-generator \
   --output requirements \
   --file-key "py_build_${package_name}" \
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index a795ccc987..b2a32d47ab 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -10,9 +10,6 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 rapids-logger "Generating build requirements"
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
-source ./ci/use_wheels_from_prs.sh
-
 rapids-dependency-file-generator \
   --output requirements \
   --file-key "py_build_${package_name}" \
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index 9535d43ae7..ea6d1cdc11 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -11,9 +11,6 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
-source ci/use_conda_packages_from_prs.sh
-
 rapids-logger "Generate C++ testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index c3d8ec7453..d76a754b8b 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
@@ -8,9 +8,6 @@ rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
-source ci/use_conda_packages_from_prs.sh
-
 rapids-logger "Generate Notebook testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index 0430c553c3..5f1894356c 100644
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -9,9 +9,6 @@ rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
-# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
-source ci/use_conda_packages_from_prs.sh
-
 rapids-logger "Generate Python testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
diff --git a/ci/use_conda_packages_from_prs.sh b/ci/use_conda_packages_from_prs.sh
deleted file mode 100644
index 3b1cba3bfb..0000000000
--- a/ci/use_conda_packages_from_prs.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
-# TODO(jameslamb): remove this file when https://github.com/rapidsai/raft/pull/2531 is merged
-
-RAFT_COMMIT="0d6597b08919f2aae8ac268f1a68d6a8fe5beb4e"
-
-RAFT_CPP_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}")
-RAFT_PYTHON_CHANNEL=$(rapids-get-pr-conda-artifact raft 2531 python "${RAFT_COMMIT:0:7}")
-
-conda config --system --add channels "${RAFT_CPP_CHANNEL}"
-conda config --system --add channels "${RAFT_PYTHON_CHANNEL}"
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
deleted file mode 100644
index aea295d4bd..0000000000
--- a/ci/use_wheels_from_prs.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
-# TODO(jameslamb): remove this file when https://github.com/rapidsai/raft/pull/2531 is merged
-
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-
-RAFT_COMMIT="0d6597b08919f2aae8ac268f1a68d6a8fe5beb4e"
-LIBRAFT_CHANNEL=$(
-  RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 cpp "${RAFT_COMMIT:0:7}"
-)
-PYLIBRAFT_CHANNEL=$(
-  RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 python "${RAFT_COMMIT:0:7}"
-)
-RAFT_DASK_CHANNEL=$(
-  RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact raft 2531 python "${RAFT_COMMIT:0:7}"
-)
-cat > ./constraints.txt <<EOF
-libraft-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${LIBRAFT_CHANNEL}/libraft_*.whl)
-pylibraft-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${PYLIBRAFT_CHANNEL}/pylibraft_*.whl)
-raft-dask-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${RAFT_DASK_CHANNEL}/raft_dask_*.whl)
-EOF
-
-export PIP_CONSTRAINT=$(pwd)/constraints.txt

From e7109c4afd33436eb8ddb2ec1d9b33c503498a90 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 17 Jan 2025 09:24:25 -0800
Subject: [PATCH 42/48] simplify CMake

---
 .github/workflows/pr.yaml     | 26 +++++++++++++-------------
 ci/build_wheel_cuml.sh        |  6 +++---
 ci/build_wheel_libcuml.sh     | 31 +++++++++++++++++++------------
 python/libcuml/CMakeLists.txt | 32 +++++++++++++-------------------
 4 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index b66958cb13..e97c7e3f48 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -28,7 +28,7 @@ jobs:
       - wheel-build-libcuml
       - wheel-build-cuml
       - wheel-tests-cuml
-      # - devcontainer
+      - devcontainer
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02
     if: always()
@@ -200,18 +200,18 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel.sh
-  # devcontainer:
-  #   needs: telemetry-setup
-  #   secrets: inherit
-  #   uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02
-  #   with:
-  #     arch: '["amd64"]'
-  #     cuda: '["12.5"]'
-  #     extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
-  #     build_command: |
-  #       sccache -z;
-  #       build-all --verbose;
-  #       sccache -s;
+  devcontainer:
+    needs: telemetry-setup
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02
+    with:
+      arch: '["amd64"]'
+      cuda: '["12.5"]'
+      extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
+      build_command: |
+        sccache -z;
+        build-all --verbose;
+        sccache -s;
 
   telemetry-summarize:
     # This job must use a self-hosted runner to record telemetry traces.
diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index bd5d9da86f..3ff307af6e 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -55,14 +55,14 @@ case "${RAPIDS_CUDA_VERSION}" in
       --exclude "libcusparse.so.12"
       --exclude "libnvJitLink.so.12"
     )
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
+    EXTRA_CMAKE_ARGS="-DUSE_CUDA_MATH_WHEELS=ON"
     ;;
   11.*)
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
+    EXTRA_CMAKE_ARGS="-DUSE_CUDA_MATH_WHEELS=OFF"
     ;;
 esac
 
-export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
+export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 mkdir -p ${package_dir}/final_dist
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index a795ccc987..c7ca473b95 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -38,25 +38,32 @@ EXCLUDE_ARGS=(
   --exclude "libraft.so"
 )
 
+# Avoid ever vendoring CUDA libraries into wheels.
+#
+# On CUDA 11 builds these excludes should technically be unnecessary because
+# there cuml and its dependencies statically link again these libraries, but
+# this is here unconditionally to ensure those wheels don't accidentally pick up
+# these libraries transitively.
+EXCLUDE_ARGS+=(
+  --exclude "libcublas.so.12"
+  --exclude "libcublasLt.so.12"
+  --exclude "libcufft.so.11"
+  --exclude "libcurand.so.10"
+  --exclude "libcusolver.so.11"
+  --exclude "libcusparse.so.12"
+  --exclude "libnvJitLink.so.12"
+)
+
 case "${RAPIDS_CUDA_VERSION}" in
   12.*)
-    EXCLUDE_ARGS+=(
-      --exclude "libcublas.so.12"
-      --exclude "libcublasLt.so.12"
-      --exclude "libcufft.so.11"
-      --exclude "libcurand.so.10"
-      --exclude "libcusolver.so.11"
-      --exclude "libcusparse.so.12"
-      --exclude "libnvJitLink.so.12"
-    )
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
+    EXTRA_CMAKE_ARGS="-DUSE_CUDA_MATH_WHEELS=ON"
     ;;
   11.*)
-    EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
+    EXTRA_CMAKE_ARGS="-DUSE_CUDA_MATH_WHEELS=OFF"
     ;;
 esac
 
-export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS};-DSINGLEGPU=OFF"
+export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;${EXTRA_CMAKE_ARGS}"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 mkdir -p ${package_dir}/final_dist
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index a7f6ea29e6..2e17266122 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -27,20 +27,7 @@ project(
 
 ################################################################################
 # - User Options  --------------------------------------------------------------
-option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
 option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF)
-option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
-
-# todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
-# https://github.com/rapidsai/cuml/issues/4843
-message(VERBOSE "CUML_PY: Disabling all mnmg components and comms libraries: ${SINGLEGPU}")
-
-# In libcuml wheels, we always want to build in all cuML algorithms.
-# This is the default in cpp/CMakeLists.txt, but just making that choice for wheels explicit here.
-set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")
-
-set(CUML_CPP_TARGET "cuml++")
-set(CUML_CPP_SRC "../../cpp")
 
 ################################################################################
 # - Process User Options  ------------------------------------------------------
@@ -62,17 +49,23 @@ set(BUILD_CUML_C_LIBRARY OFF)
 set(BUILD_CUML_EXAMPLES OFF)
 set(BUILD_CUML_BENCH OFF)
 
+# In libcuml wheels, we always want to build in all cuML algorithms.
+# This is the default in cpp/CMakeLists.txt, but just making that choice for wheels explicit here.
+set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")
+
+# for libcuml wheels, always compile in the multi-node, multi-GPU stuff from cumlprims_mg
+set(SINGLEGPU ON)
+
+set(CUML_CPP_TARGET "cuml++")
+set(CUML_CPP_SRC "../../cpp")
+
 # --- cumlprims_mg --- #
 # ship cumlprims_mg in the 'libcuml' wheel (for re-use by 'cuml' wheels)
 set(CUML_USE_CUMLPRIMS_MG_STATIC OFF)
 set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL OFF)
 
 # --- cuVS --- #
-if(USE_CUVS_WHEEL)
-  set(CUML_USE_CUVS_STATIC OFF)
-else()
-  set(CUML_USE_CUVS_STATIC ON)
-endif()
+set(CUML_USE_CUVS_STATIC OFF)
 
 # don't unclude any cuVS stuff in libcuml wheel
 # (expect anything building against libcuml to provide cuvs headers externally)
@@ -117,7 +110,8 @@ if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
   set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
 endif()
 
-if(USE_CUVS_WHEEL)
+# if dynamically linking to cuVS, set up an RPATH to find the libcuvs.so installed via the cuvs wheel
+if(NOT CUML_USE_CUVS_STATIC)
   # assumes libcuml++ is installed 2 levels deep, e.g. site-packages/cuml/lib64/libcuml++.so
   set(rpaths "$ORIGIN/../../cuvs")
   set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)

From b17f6138e67a24d4d1be2be6852f7145c07f17ab Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 17 Jan 2025 10:07:50 -0800
Subject: [PATCH 43/48] fix typo in rapids_cuda_init_architectures

---
 python/libcuml/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index 2e17266122..98c5b17f85 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -17,7 +17,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 include(../../rapids_config.cmake)
 
 include(rapids-cuda)
-rapids_cuda_init_architectures(libcugraph-python)
+rapids_cuda_init_architectures(libcuml-python)
 
 project(
   libcuml-python

From 2623cc99f5ab9bef6e55874f7ea2069b01ecd3fb Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 17 Jan 2025 10:40:56 -0800
Subject: [PATCH 44/48] fix SINGLEGPU arg

---
 python/libcuml/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index 98c5b17f85..42aed3e8a7 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -54,7 +54,7 @@ set(BUILD_CUML_BENCH OFF)
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Choose which algorithms are built cuML. Can specify individual algorithms or groups in a semicolon-separated list.")
 
 # for libcuml wheels, always compile in the multi-node, multi-GPU stuff from cumlprims_mg
-set(SINGLEGPU ON)
+set(SINGLEGPU OFF)
 
 set(CUML_CPP_TARGET "cuml++")
 set(CUML_CPP_SRC "../../cpp")

From 77c8f1486e64fe22578e329f23d5d16a2ad5804c Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 17 Jan 2025 13:47:31 -0800
Subject: [PATCH 45/48] updates

---
 ci/validate_wheel.sh       | 11 +++++++++++
 python/cuml/CMakeLists.txt | 21 +++++----------------
 python/cuml/pyproject.toml |  5 +----
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index bcc67af0c7..ca0a34e15e 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -20,6 +20,17 @@ if [[ "${package_dir}" == "python/libcuml" ]]; then
             --max-allowed-size-compressed '500M'
         )
     fi
+elif [[ "${package_dir}" == "python/cuml" ]]; then
+    # TODO(jameslamb): why are the CUDA 11 wheels so big???
+    if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then
+        PYDISTCHECK_ARGS+=(
+            --max-allowed-size-compressed '1.5G'
+        )
+    else
+        PYDISTCHECK_ARGS+=(
+            --max-allowed-size-compressed '75M'
+        )
+    fi
 fi
 
 cd "${package_dir}"
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 95502c9d36..18004787b9 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -38,7 +38,6 @@ project(
 option(CUML_UNIVERSAL "Build all cuML Python components." ON)
 option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
 option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF)
-option(USE_CUVS_WHEEL "Use the cuVS wheel" OFF)
 option(USE_LIBCUML_WHEEL "Use libcuml wheel to provide some dependencies" OFF)
 
 # todo: use CMAKE_MESSAGE_CONTEXT for prefix for logging.
@@ -104,10 +103,9 @@ else()
     set(CUDA_STATIC_MATH_LIBRARIES ON)
   endif()
 
-  # --- CCCL, RAFT, RMM ---#
-  # find CCCL, RAFT, and RMM before cuVS, to avoid
+  # --- RAFT---#
+  # find RAFT before cuVS, to avoid
   # cuVS CMake defining conflicting versions of targets like 'nvidia::cutlass::cutlass'
-  # include(${CUML_CPP_SRC}/cmake/thirdparty/get_cccl.cmake)
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
 
   # --- cuVS --- #
@@ -118,22 +116,16 @@ else()
   #
   # Until then, this is necessary because cuVS is included in the public headers of both
   # libcuml (C++) and cuml (Cython).
-  message(STATUS "--- [debug] finding cuVS for cuML")
   set(CUML_USE_CUVS_STATIC OFF)
   set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_cuvs.cmake)
-  message(STATUS "--- [debug] done finding cuVS for cuML")
 
   # --- treelite --- #
-  # We need to call get_treelite explicitly because we need the correct
+  # Need to call get_treelite explicitly because we need the correct
   # ${TREELITE_LIBS} definition for RF.
   #
-  # And because cuml Cython code needs to headers to satisfy calls like 'cdef extern from "treelite/c_api.h"'
-  #
-  # and it needs to come before find_package(cuml), because it's a PUBLIC
-  # dependency of cuml::cuml.
-  #
-  # TODO(jameslamb): clean up these comments
+  # And because cuml Cython code needs the headers to satisfy calls like
+  # 'cdef extern from "treelite/c_api.h"'
 
   # wheel builds use a static treelite, because the 'libtreelite.so' in 'treelite' wheels
   # isn't intended for dynamic linking by third-party projects (e.g. hides its symbols)
@@ -149,13 +141,10 @@ else()
 
   set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
 
-  message(STATUS "--- [debug] finding treelite for cuML")
   include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
 
   # --- libcuml --- #
-  message(STATUS "--- [debug] finding cuML")
   find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
-  message(STATUS "--- [debug] found cuML")
 
   set(cuml_sg_libraries cuml::${CUML_CPP_TARGET})
   set(cuml_mg_libraries cuml::${CUML_CPP_TARGET})
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index 5a96d98874..1c5b0f7c8a 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -21,12 +21,10 @@ requires = [
 
 [tool.pydistcheck]
 select = [
+    # NOTE: size threshold is managed via CLI args in CI scripts
     "distro-too-large-compressed",
 ]
 
-# PyPI limit is 100 MiB, fail CI before we get too close to that
-max_allowed_size_compressed = '75M'
-
 [tool.pytest.ini_options]
 addopts = "--tb=native"
 
@@ -177,7 +175,6 @@ versioneer\.py |
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
-
 requires = [
     "cmake>=3.26.4,!=3.30.0",
     "cuda-python",

From 3a02bf5e88941065fbf2817df174e86838186cd1 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 21 Jan 2025 11:45:12 -0800
Subject: [PATCH 46/48] use libcuvs wheels

---
 ci/build_cpp.sh                               |  5 ++-
 ci/build_docs.sh                              |  5 ++-
 ci/build_python.sh                            |  5 ++-
 ci/build_wheel_cuml.sh                        |  3 ++
 ci/build_wheel_libcuml.sh                     |  3 ++
 ci/test_cpp.sh                                |  5 ++-
 ci/test_notebooks.sh                          |  5 ++-
 ci/test_python_common.sh                      |  5 ++-
 ci/test_wheel.sh                              |  3 ++
 ci/use_conda_packages_from_prs.sh             | 10 ++++++
 ci/use_wheels_from_prs.sh                     | 19 ++++++++++
 .../all_cuda-118_arch-x86_64.yaml             |  1 -
 .../all_cuda-125_arch-x86_64.yaml             |  1 -
 .../clang_tidy_cuda-118_arch-x86_64.yaml      |  2 +-
 .../cpp_all_cuda-118_arch-x86_64.yaml         |  2 +-
 .../cpp_all_cuda-125_arch-x86_64.yaml         |  2 +-
 dependencies.yaml                             | 25 +++++++++++--
 python/cuml/CMakeLists.txt                    | 35 -------------------
 python/libcuml/CMakeLists.txt                 | 10 ------
 python/libcuml/libcuml/load.py                | 16 +++++++++
 python/libcuml/pyproject.toml                 |  1 -
 21 files changed, 104 insertions(+), 59 deletions(-)
 create mode 100644 ci/use_conda_packages_from_prs.sh
 create mode 100644 ci/use_wheels_from_prs.sh

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index fa066c03c6..7c35f35be5 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -9,6 +9,9 @@ source rapids-configure-sccache
 
 source rapids-date-string
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 05f1f24ee5..51d46ef738 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -1,10 +1,13 @@
 #!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 set -euo pipefail
 
 rapids-logger "Create test conda environment"
 . /opt/conda/etc/profile.d/conda.sh
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 RAPIDS_VERSION="$(rapids-version)"
 export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
 
diff --git a/ci/build_python.sh b/ci/build_python.sh
index e8d2b3ed39..d0b5724f89 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -9,6 +9,9 @@ source rapids-configure-sccache
 
 source rapids-date-string
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
diff --git a/ci/build_wheel_cuml.sh b/ci/build_wheel_cuml.sh
index 79b8795094..654ffe92a7 100755
--- a/ci/build_wheel_cuml.sh
+++ b/ci/build_wheel_cuml.sh
@@ -6,6 +6,9 @@ set -euo pipefail
 package_name="cuml"
 package_dir="python/cuml"
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ./ci/use_wheels_from_prs.sh
+
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libcuml wheel built in the previous step and make it
diff --git a/ci/build_wheel_libcuml.sh b/ci/build_wheel_libcuml.sh
index ee85aec77b..6a0ef1a8dc 100755
--- a/ci/build_wheel_libcuml.sh
+++ b/ci/build_wheel_libcuml.sh
@@ -6,6 +6,9 @@ set -euo pipefail
 package_name="libcuml"
 package_dir="python/libcuml"
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ./ci/use_wheels_from_prs.sh
+
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 rapids-logger "Generating build requirements"
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index ea6d1cdc11..5fc351e15b 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -8,6 +8,9 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
 
 . /opt/conda/etc/profile.d/conda.sh
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index d76a754b8b..92c63860e2 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -1,9 +1,12 @@
 #!/bin/bash
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index 5f1894356c..3a2c772c51 100644
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -1,10 +1,13 @@
 #!/bin/bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ci/use_conda_packages_from_prs.sh
+
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index 8027876005..7fa04ef5d7 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -3,6 +3,9 @@
 
 set -euo pipefail
 
+# TODO(jameslamb): remove this when https://github.com/rapidsai/raft/pull/2531 is merged
+source ./ci/use_wheels_from_prs.sh
+
 mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
diff --git a/ci/use_conda_packages_from_prs.sh b/ci/use_conda_packages_from_prs.sh
new file mode 100644
index 0000000000..f5e570ebbb
--- /dev/null
+++ b/ci/use_conda_packages_from_prs.sh
@@ -0,0 +1,10 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+# TODO(jameslamb): remove this file when https://github.com/rapidsai/cuvs/pull/594 is merged
+
+CUVS_COMMIT="0bc1f0a77d46bda91eda6f816ea7b49b797676f9"
+
+CUVS_CPP_CHANNEL=$(rapids-get-pr-conda-artifact cuvs 594 cpp "${RAFT_COMMIT:0:7}")
+CUVS_PYTHON_CHANNEL=$(rapids-get-pr-conda-artifact cuvs 594 python "${RAFT_COMMIT:0:7}")
+
+conda config --system --add channels "${CUVS_CPP_CHANNEL}"
+conda config --system --add channels "${CUVS_PYTHON_CHANNEL}"
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
new file mode 100644
index 0000000000..10175fd0d3
--- /dev/null
+++ b/ci/use_wheels_from_prs.sh
@@ -0,0 +1,19 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+# TODO(jameslamb): remove this file when https://github.com/rapidsai/cuvs/pull/594 is merged
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+CUVS_COMMIT="0bc1f0a77d46bda91eda6f816ea7b49b797676f9"
+CUVS_CHANNEL=$(
+  RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact cuvs 594 python "${RAFT_COMMIT:0:7}"
+)
+LIBCUVS_CHANNEL=$(
+  RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact cuvs 594 cpp "${RAFT_COMMIT:0:7}"
+)
+
+cat > ./constraints.txt <<EOF
+cuvs-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${CUVS_CHANNEL}/cuvs_*.whl)
+libcuvs-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${LIBCUVS_CHANNEL}/libcuvs_*.whl)
+EOF
+
+export PIP_CONSTRAINT=$(pwd)/constraints.txt
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 8d3c668beb..4a89954dbc 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -41,7 +41,6 @@ dependencies:
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
-- libcuvs==25.2.*,>=0.0.0a0
 - libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index f83a242a43..bb9753339c 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -38,7 +38,6 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
-- libcuvs==25.2.*,>=0.0.0a0
 - libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
index f307fd45e3..01aff21d23 100644
--- a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
@@ -27,8 +27,8 @@ dependencies:
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
-- libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
+- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - ninja
 - nvcc_linux-64=11.8
diff --git a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
index 6220cd8a50..7b2237f7de 100644
--- a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
@@ -25,8 +25,8 @@ dependencies:
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
-- libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
+- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - ninja
 - nvcc_linux-64=11.8
diff --git a/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
index 5b553bc95d..eb059c6e1a 100644
--- a/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
@@ -22,8 +22,8 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
-- libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
+- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - ninja
 - spdlog>=1.14.1,<1.15
diff --git a/dependencies.yaml b/dependencies.yaml
index c37b981445..e8fe0ee114 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -112,6 +112,7 @@ files:
       - depends_on_cuda_python
       - depends_on_libcuml
       - depends_on_libcumlprims
+      - depends_on_libcuvs
       - depends_on_libraft
       - depends_on_librmm
       - depends_on_pylibraft
@@ -157,6 +158,7 @@ files:
       key: requires
     includes:
       - common_build
+      - depends_on_libcuvs
       - depends_on_libraft
       - depends_on_librmm
   py_run_libcuml:
@@ -166,7 +168,7 @@ files:
       table: project
     includes:
       - cuda_wheels
-      - depends_on_cuvs
+      - depends_on_libcuvs
       - depends_on_libraft
 channels:
   - rapidsai
@@ -640,7 +642,26 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - libcuvs==25.2.*,>=0.0.0a0
+          - &libcuvs_unsuffixed libraft==25.2.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcuvs-cu12==25.2.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcuvs-cu11==25.2.*,>=0.0.0a0
+          - {matrix: null, packages: [*libcuvs_unsuffixed]}
   depends_on_libraft:
     common:
       - output_types: conda
diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt
index 18004787b9..53f6b19c53 100644
--- a/python/cuml/CMakeLists.txt
+++ b/python/cuml/CMakeLists.txt
@@ -85,41 +85,6 @@ else()
   include(rapids-export)
   rapids_cpm_init()
 
-  # --- CUDA --- #
-  set(CUDA_STATIC_RUNTIME ON)
-
-  # Link to the CUDA wheels with shared libraries for CUDA 12+
-  #
-  # This is here because we're rebuilding cuVS below...without it, cuVS on CUDA 11
-  # dynamically links to CUDA math libs (cuBLAS, cuFFT, etc.), and then
-  # that linkage results in those libraries being vendored in wheels by auditwheel.
-  find_package(CUDAToolkit REQUIRED)
-  if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0)
-    set(CUDA_STATIC_MATH_LIBRARIES OFF)
-  else()
-    if(USE_CUDA_MATH_WHEELS)
-      message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0")
-    endif()
-    set(CUDA_STATIC_MATH_LIBRARIES ON)
-  endif()
-
-  # --- RAFT---#
-  # find RAFT before cuVS, to avoid
-  # cuVS CMake defining conflicting versions of targets like 'nvidia::cutlass::cutlass'
-  include(${CUML_CPP_SRC}/cmake/thirdparty/get_raft.cmake)
-
-  # --- cuVS --- #
-  # Once there are 'libcuvs' wheels, it should be possible to remove this CPM build of cuvs.
-  #
-  #   * conda builds will find 'libcuvs' in the build environment
-  #   * wheel builds will find 'libcuvs' wherever that wheel is installed
-  #
-  # Until then, this is necessary because cuVS is included in the public headers of both
-  # libcuml (C++) and cuml (Cython).
-  set(CUML_USE_CUVS_STATIC OFF)
-  set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
-  include(${CUML_CPP_SRC}/cmake/thirdparty/get_cuvs.cmake)
-
   # --- treelite --- #
   # Need to call get_treelite explicitly because we need the correct
   # ${TREELITE_LIBS} definition for RF.
diff --git a/python/libcuml/CMakeLists.txt b/python/libcuml/CMakeLists.txt
index 42aed3e8a7..44f1acd3ec 100644
--- a/python/libcuml/CMakeLists.txt
+++ b/python/libcuml/CMakeLists.txt
@@ -66,9 +66,6 @@ set(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL OFF)
 
 # --- cuVS --- #
 set(CUML_USE_CUVS_STATIC OFF)
-
-# don't unclude any cuVS stuff in libcuml wheel
-# (expect anything building against libcuml to provide cuvs headers externally)
 set(CUML_EXCLUDE_CUVS_FROM_ALL ON)
 
 # --- raft --- #
@@ -109,10 +106,3 @@ if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
   )
   set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
 endif()
-
-# if dynamically linking to cuVS, set up an RPATH to find the libcuvs.so installed via the cuvs wheel
-if(NOT CUML_USE_CUVS_STATIC)
-  # assumes libcuml++ is installed 2 levels deep, e.g. site-packages/cuml/lib64/libcuml++.so
-  set(rpaths "$ORIGIN/../../cuvs")
-  set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH ${rpaths} APPEND)
-endif()
diff --git a/python/libcuml/libcuml/load.py b/python/libcuml/libcuml/load.py
index 9e85ba2e94..af7ce19aec 100644
--- a/python/libcuml/libcuml/load.py
+++ b/python/libcuml/libcuml/load.py
@@ -67,6 +67,22 @@ def load_library():
         # the loader can find it.
         pass
 
+    try:
+        # libcvs must be loaded before libcuml++ because libcuml++
+        # references its symbols
+        import libcuvs
+
+        libcuvs.load_library()
+    except ModuleNotFoundError:
+        # 'libcuml++' has a runtime dependency on 'libcuml'. However,
+        # that dependency might be satisfied by the 'libcuvs' conda package
+        # (which does not have any Python modules), instead of the
+        # 'libcuvs' wheel.
+        #
+        # In that situation, assume that 'libcuvs.so' is in a place where
+        # the loader can find it.
+        pass
+
     prefer_system_installation = (
         os.getenv("RAPIDS_LIBCUML_PREFER_SYSTEM_LIBRARY", "false").lower()
         != "false"
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
index 0caace57f4..e900d11659 100644
--- a/python/libcuml/pyproject.toml
+++ b/python/libcuml/pyproject.toml
@@ -37,7 +37,6 @@ classifiers = [
     "Environment :: GPU :: NVIDIA CUDA",
 ]
 dependencies = [
-    "cuvs==25.2.*,>=0.0.0a0",
     "libraft==25.2.*,>=0.0.0a0",
     "nvidia-cublas",
     "nvidia-cufft",

From bd8de885b2424bf63b2d32d963f86ca0fa2cf6a3 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 21 Jan 2025 12:23:24 -0800
Subject: [PATCH 47/48] CUVS_COMMIT

---
 ci/use_conda_packages_from_prs.sh | 4 ++--
 ci/use_wheels_from_prs.sh         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/use_conda_packages_from_prs.sh b/ci/use_conda_packages_from_prs.sh
index f5e570ebbb..3f0c681780 100644
--- a/ci/use_conda_packages_from_prs.sh
+++ b/ci/use_conda_packages_from_prs.sh
@@ -3,8 +3,8 @@
 
 CUVS_COMMIT="0bc1f0a77d46bda91eda6f816ea7b49b797676f9"
 
-CUVS_CPP_CHANNEL=$(rapids-get-pr-conda-artifact cuvs 594 cpp "${RAFT_COMMIT:0:7}")
-CUVS_PYTHON_CHANNEL=$(rapids-get-pr-conda-artifact cuvs 594 python "${RAFT_COMMIT:0:7}")
+CUVS_CPP_CHANNEL=$(rapids-get-pr-conda-artifact cuvs 594 cpp "${CUVS_COMMIT:0:7}")
+CUVS_PYTHON_CHANNEL=$(rapids-get-pr-conda-artifact cuvs 594 python "${CUVS_COMMIT:0:7}")
 
 conda config --system --add channels "${CUVS_CPP_CHANNEL}"
 conda config --system --add channels "${CUVS_PYTHON_CHANNEL}"
diff --git a/ci/use_wheels_from_prs.sh b/ci/use_wheels_from_prs.sh
index 10175fd0d3..02baf98e4a 100644
--- a/ci/use_wheels_from_prs.sh
+++ b/ci/use_wheels_from_prs.sh
@@ -5,10 +5,10 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 CUVS_COMMIT="0bc1f0a77d46bda91eda6f816ea7b49b797676f9"
 CUVS_CHANNEL=$(
-  RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact cuvs 594 python "${RAFT_COMMIT:0:7}"
+  RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact cuvs 594 python "${CUVS_COMMIT:0:7}"
 )
 LIBCUVS_CHANNEL=$(
-  RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact cuvs 594 cpp "${RAFT_COMMIT:0:7}"
+  RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-wheel-artifact cuvs 594 cpp "${CUVS_COMMIT:0:7}"
 )
 
 cat > ./constraints.txt <<EOF

From 610970aef6e01c8a4c33f1950172b09c61b440e9 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 21 Jan 2025 12:36:41 -0800
Subject: [PATCH 48/48] fix depends_on_libcuvs

---
 conda/environments/all_cuda-118_arch-x86_64.yaml        | 1 +
 conda/environments/all_cuda-125_arch-x86_64.yaml        | 1 +
 conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/cpp_all_cuda-118_arch-x86_64.yaml    | 2 +-
 conda/environments/cpp_all_cuda-125_arch-x86_64.yaml    | 2 +-
 dependencies.yaml                                       | 2 +-
 python/cuml/pyproject.toml                              | 1 +
 python/libcuml/pyproject.toml                           | 2 ++
 8 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 4a89954dbc..8d3c668beb 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -41,6 +41,7 @@ dependencies:
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
+- libcuvs==25.2.*,>=0.0.0a0
 - libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index bb9753339c..f83a242a43 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -38,6 +38,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libcuvs==25.2.*,>=0.0.0a0
 - libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - nbsphinx
diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
index 01aff21d23..f307fd45e3 100644
--- a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
@@ -27,8 +27,8 @@ dependencies:
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
+- libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
-- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - ninja
 - nvcc_linux-64=11.8
diff --git a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
index 7b2237f7de..6220cd8a50 100644
--- a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
@@ -25,8 +25,8 @@ dependencies:
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
+- libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
-- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - ninja
 - nvcc_linux-64=11.8
diff --git a/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
index eb059c6e1a..5b553bc95d 100644
--- a/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
@@ -22,8 +22,8 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libcuvs==25.2.*,>=0.0.0a0
 - libraft-headers==25.2.*,>=0.0.0a0
-- libraft==25.2.*,>=0.0.0a0
 - librmm==25.2.*,>=0.0.0a0
 - ninja
 - spdlog>=1.14.1,<1.15
diff --git a/dependencies.yaml b/dependencies.yaml
index e8fe0ee114..823ba2ceb1 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -642,7 +642,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &libcuvs_unsuffixed libraft==25.2.*,>=0.0.0a0
+          - &libcuvs_unsuffixed libcuvs==25.2.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index 1c5b0f7c8a..7d2875b0b2 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -180,6 +180,7 @@ requires = [
     "cuda-python",
     "cython>=3.0.0",
     "libcuml==25.2.*,>=0.0.0a0",
+    "libcuvs==25.2.*,>=0.0.0a0",
     "libraft==25.2.*,>=0.0.0a0",
     "librmm==25.2.*,>=0.0.0a0",
     "ninja",
diff --git a/python/libcuml/pyproject.toml b/python/libcuml/pyproject.toml
index e900d11659..11dec52394 100644
--- a/python/libcuml/pyproject.toml
+++ b/python/libcuml/pyproject.toml
@@ -37,6 +37,7 @@ classifiers = [
     "Environment :: GPU :: NVIDIA CUDA",
 ]
 dependencies = [
+    "libcuvs==25.2.*,>=0.0.0a0",
     "libraft==25.2.*,>=0.0.0a0",
     "nvidia-cublas",
     "nvidia-cufft",
@@ -79,6 +80,7 @@ dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
 requires = [
     "cmake>=3.26.4,!=3.30.0",
+    "libcuvs==25.2.*,>=0.0.0a0",
     "libraft==25.2.*,>=0.0.0a0",
     "librmm==25.2.*,>=0.0.0a0",
     "ninja",