From 2ec4fd23bbbeb596826c499a457aff5659084ae6 Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Fri, 6 Dec 2024 12:06:39 -0800 Subject: [PATCH 1/5] Update from upstream: - Compatibility with configs - Split out parsing --- .../cmd/environment/environment_cmd.py | 195 +----------- .../plugins/conda/conda_step_decorator.py | 4 +- .../netflix_ext/plugins/conda/parsers.py | 283 ++++++++++++++++++ 3 files changed, 295 insertions(+), 187 deletions(-) create mode 100644 metaflow_extensions/netflix_ext/plugins/conda/parsers.py diff --git a/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py b/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py index 899343a..c36f7ca 100644 --- a/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py +++ b/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py @@ -1,7 +1,6 @@ import json import os import platform -import re import shutil import subprocess import tempfile @@ -22,8 +21,6 @@ from metaflow.metaflow_config import ( CONDA_ALL_ARCHS, CONDA_DEPENDENCY_RESOLVER, - CONDA_TEST, - CONDA_SYS_DEPENDENCIES, DEFAULT_DATASTORE, DEFAULT_METADATA, get_pinned_conda_libs, @@ -39,8 +36,13 @@ ResolvedEnvironment, env_type_for_deps, ) -from metaflow_extensions.netflix_ext.plugins.conda.envsresolver import EnvsResolver -from metaflow_extensions.netflix_ext.plugins.conda.utils import ( +from metaflow_extensions.nflx.plugins.new_conda.envsresolver import EnvsResolver + +from metaflow_extensions.nflx.plugins.new_conda.parsers import ( + parse_req_value, + parse_yml_value, +) +from metaflow_extensions.nflx.plugins.new_conda.utils import ( AliasType, arch_id, channel_or_url, @@ -52,24 +54,12 @@ tstr_to_dict, ) -from metaflow._vendor.packaging.requirements import InvalidRequirement, Requirement from metaflow._vendor.packaging.specifiers import SpecifierSet from metaflow._vendor.packaging.utils import canonicalize_version from .utils import download_mf_version -REQ_SPLIT_LINE = re.compile(r"([^~<=>]*)([~<=>]+.*)?") - -# Allows things like: -# pkg = <= version -# pkg <= version -# pkg = version -# pkg = ==version or pkg = =version -# In other words, the = is optional but possible -YML_SPLIT_LINE = re.compile(r"(?:=\s)?(<=|>=|~=|==|<|>|=)") - - class CommandObj: def __init__(self): pass @@ -1110,92 +1100,8 @@ def _parse_req_file( np_deps: Dict[str, str], sys_deps: Dict[str, str], ) -> Optional[str]: - python_version = None with open(file_name, mode="r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line: - continue - splits = line.split(maxsplit=1) - first_word = splits[0] - if len(splits) > 1: - rem = splits[1] - else: - rem = None - if first_word in ("-i", "--index-url"): - raise InvalidEnvironmentException( - "To specify a base PYPI index, set `METAFLOW_CONDA_DEFAULT_PYPI_SOURCE; " - "you can specify additional indices using --extra-index-url" - ) - elif first_word == "--extra-index-url" and rem: - sources.setdefault("pypi", []).append(rem) - elif first_word in ("-f", "--find-links", "--trusted-host") and rem: - extra_args.setdefault("pypi", []).append(" ".join([first_word, rem])) - elif first_word in ("--pre", "--no-index"): - extra_args.setdefault("pypi", []).append(first_word) - elif first_word == "--conda-channel" and rem: - sources.setdefault("conda", []).append(rem) - elif first_word == "--conda-pkg": - # Special extension to allow non-python conda package specification - split_res = REQ_SPLIT_LINE.match(splits[1]) - if split_res is None: - raise InvalidEnvironmentException( - "Could not parse conda package '%s'" % splits[1] - ) - s = split_res.groups() - if s[1] is None: - np_deps[s[0].replace(" ", "")] = "" - else: - np_deps[s[0].replace(" ", "")] = s[1].replace(" ", "").lstrip("=") - elif first_word == "--sys-pkg": - # Special extension to allow the specification of system dependencies - # (currently __cuda and __glibc) - split_res = REQ_SPLIT_LINE.match(splits[1]) - if split_res is None: - raise InvalidEnvironmentException( - "Could not parse system package '%s'" % splits[1] - ) - s = split_res.groups() - pkg_name = s[0].replace(" ", "") - if pkg_name not in CONDA_SYS_DEPENDENCIES: - raise InvalidEnvironmentException( - "System package '%s' not allowed. Values allowed are: %s" - % (pkg_name, str(CONDA_SYS_DEPENDENCIES)) - ) - if s[1] is None: - raise InvalidEnvironmentException( - "System package '%s' requires a version" % pkg_name - ) - sys_deps[pkg_name] = s[1].replace(" ", "").lstrip("=") - elif first_word.startswith("#"): - continue - elif first_word.startswith("-"): - raise InvalidEnvironmentException( - "'%s' is not a supported line in a requirements.txt" % line - ) - else: - try: - parsed_req = Requirement(line) - except InvalidRequirement as ex: - raise InvalidEnvironmentException( - "Could not parse '%s'" % line - ) from ex - if parsed_req.marker is not None: - raise InvalidEnvironmentException( - "Environment markers are not supported for '%s'" % line - ) - dep_name = parsed_req.name - if parsed_req.extras: - dep_name += "[%s]" % ",".join(parsed_req.extras) - if parsed_req.url: - dep_name += "@%s" % parsed_req.url - specifier = str(parsed_req.specifier).lstrip(" =") - if dep_name == "python": - if specifier: - python_version = specifier - else: - deps[dep_name] = specifier - return python_version + return parse_req_value(f.read(), extra_args, sources, deps, np_deps, sys_deps) def _parse_yml_file( @@ -1206,91 +1112,8 @@ def _parse_yml_file( pypi_deps: Dict[str, str], sys_deps: Dict[str, str], ) -> Optional[str]: - python_version = None # type: Optional[str] with open(file_name, mode="r", encoding="utf-8") as f: - # Very poor man's yaml parsing - mode = None - for line in f: - if not line: - continue - elif line[0] not in (" ", "-"): - line = line.strip() - if line == "channels:": - mode = "sources" - elif line == "dependencies:": - mode = "deps" - elif line == "pypi-indices:": - mode = "pypi_sources" - else: - mode = "ignore" - elif mode and mode.endswith("sources"): - line = line.lstrip(" -").rstrip() - sources.setdefault("conda" if mode == "sources" else "pypi", []).append( - line - ) - elif mode and mode.endswith("deps"): - line = line.lstrip(" -").rstrip() - if line == "pip:": - mode = "pypi_deps" - elif line == "sys:": - mode = "sys_deps" - else: - to_update = ( - conda_deps - if mode == "deps" - else pypi_deps if mode == "pypi_deps" else sys_deps - ) - splits = YML_SPLIT_LINE.split(line.replace(" ", ""), maxsplit=1) - if len(splits) == 1: - if splits[0] != "python": - if mode == "sys_deps": - raise InvalidEnvironmentException( - "System package '%s' requires a version" % splits[0] - ) - to_update[splits[0]] = "" - else: - dep_name, dep_operator, dep_version = splits - if dep_operator not in ("=", "=="): - if mode == "sys_deps": - raise InvalidEnvironmentException( - "System package '%s' requires a specific version not '%s'" - % (splits[0], dep_operator + dep_version) - ) - dep_version = dep_operator + dep_version - if dep_name == "python": - if dep_version: - if python_version: - raise InvalidEnvironmentException( - "Python versions specified multiple times in " - "the YAML file." - ) - python_version = dep_version - else: - if ( - dep_name.startswith("/") - or dep_name.startswith("git+") - or dep_name.startswith("https://") - or dep_name.startswith("ssh://") - ): - # Handle the case where only the URL is specified - # without a package name - depname_and_maybe_tag = dep_name.split("/")[-1] - depname = depname_and_maybe_tag.split("@")[0] - if depname.endswith(".git"): - depname = depname[:-4] - dep_name = "%s@%s" % (depname, dep_name) - - if ( - mode == "sys_deps" - and dep_name not in CONDA_SYS_DEPENDENCIES - ): - raise InvalidEnvironmentException( - "System package '%s' not allowed. Values allowed are: %s" - % (dep_name, str(CONDA_SYS_DEPENDENCIES)) - ) - to_update[dep_name] = dep_version - - return python_version + return parse_yml_value(f.read(), {}, sources, conda_deps, pypi_deps, sys_deps) # @environment.command(help="List resolved environments for a set of dependencies") diff --git a/metaflow_extensions/netflix_ext/plugins/conda/conda_step_decorator.py b/metaflow_extensions/netflix_ext/plugins/conda/conda_step_decorator.py index ba40167..d861672 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/conda_step_decorator.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/conda_step_decorator.py @@ -465,7 +465,9 @@ def runtime_task_created( parent_ds = self._flow_datastore.get_task_datastore( run_id, step_name, task_id ) - for var, _ in self._flow._get_parameters(): + for var, param in self._flow._get_parameters(): + if param.IS_CONFIG_PARAMETER: + continue self._env_for_fetch[ "METAFLOW_INIT_%s" % var.upper().replace("-", "_") ] = lambda _param=getattr( diff --git a/metaflow_extensions/netflix_ext/plugins/conda/parsers.py b/metaflow_extensions/netflix_ext/plugins/conda/parsers.py new file mode 100644 index 0000000..c273527 --- /dev/null +++ b/metaflow_extensions/netflix_ext/plugins/conda/parsers.py @@ -0,0 +1,283 @@ +import re +from typing import Any, Dict, List, Optional + +from metaflow.metaflow_config import CONDA_SYS_DEPENDENCIES +from metaflow._vendor.packaging.requirements import InvalidRequirement, Requirement + +from .conda import InvalidEnvironmentException + + +REQ_SPLIT_LINE = re.compile(r"([^~<=>]*)([~<=>]+.*)?") + +# Allows things like: +# pkg = <= version +# pkg <= version +# pkg = version +# pkg = ==version or pkg = =version +# In other words, the = is optional but possible +YML_SPLIT_LINE = re.compile(r"(?:=\s)?(<=|>=|~=|==|<|>|=)") + + +def req_parser(config_value: str) -> Dict[str, Any]: + extra_args = {} + sources = {} + deps = {} + np_deps = {} + sys_deps = {} + python_version = parse_req_value( + config_value, extra_args, sources, deps, np_deps, sys_deps + ) + result = {} + if python_version: + result["python"] = python_version + + if extra_args: + raise InvalidEnvironmentException( + "Additional arguments are not supported when parsing requirements.txt for " + "the pypi decorator -- use a named environment instead" + ) + if np_deps: + raise InvalidEnvironmentException( + "Non-python dependencies are not supported when parsing requirements.txt for " + "the pypi decorator -- use a named environment instead" + ) + if sys_deps: + raise InvalidEnvironmentException( + "System dependencies are not supported when parsing requirements.txt for " + "the pypi decorator -- use a named environment instead" + ) + + if "pypi" in sources: + result["extra_indices"] = sources["pypi"] + del sources["pypi"] + if len(sources): + raise InvalidEnvironmentException( + "Only PYPI sources are allowed in requirements.txt" + ) + + result["packages"] = deps + + return result + + +def yml_parser(config_value: str) -> Dict[str, Any]: + sources = {} + conda_deps = {} + pypi_deps = {} + sys_deps = {} + python_version = parse_yml_value( + config_value, {}, sources, conda_deps, pypi_deps, sys_deps + ) + result = {} + if sys_deps: + raise InvalidEnvironmentException( + "System dependencies are not supported when parsing environment.yml for " + "the conda decorator -- use a named environment instead" + ) + + if python_version: + result["python"] = python_version + + if "conda" in sources: + result["channels"] = sources["conda"] + del sources["conda"] + if "pypi" in sources: + result["pip_sources"] = sources["pypi"] + del sources["pypi"] + if len(sources): + raise InvalidEnvironmentException( + "Only conda and Pypi sources are allowed in the YAML file" + ) + + if len(conda_deps): + result["libraries"] = conda_deps + if len(pypi_deps): + result["pip_packages"] = pypi_deps + + return result + + +def parse_req_value( + file_content: str, + extra_args: Dict[str, List[str]], + sources: Dict[str, List[str]], + deps: Dict[str, str], + np_deps: Dict[str, str], + sys_deps: Dict[str, str], +) -> Optional[str]: + python_version = None + for line in file_content.splitlines(): + line = line.strip() + if not line: + continue + splits = line.split(maxsplit=1) + first_word = splits[0] + if len(splits) > 1: + rem = splits[1] + else: + rem = None + if first_word in ("-i", "--index-url"): + raise InvalidEnvironmentException( + "To specify a base PYPI index, set `METAFLOW_CONDA_DEFAULT_PYPI_SOURCE; " + "you can specify additional indices using --extra-index-url" + ) + elif first_word == "--extra-index-url" and rem: + sources.setdefault("pypi", []).append(rem) + elif first_word in ("-f", "--find-links", "--trusted-host") and rem: + extra_args.setdefault("pypi", []).append(" ".join([first_word, rem])) + elif first_word in ("--pre", "--no-index"): + extra_args.setdefault("pypi", []).append(first_word) + elif first_word == "--conda-channel" and rem: + sources.setdefault("conda", []).append(rem) + elif first_word == "--conda-pkg": + # Special extension to allow non-python conda package specification + split_res = REQ_SPLIT_LINE.match(splits[1]) + if split_res is None: + raise InvalidEnvironmentException( + "Could not parse conda package '%s'" % splits[1] + ) + s = split_res.groups() + if s[1] is None: + np_deps[s[0].replace(" ", "")] = "" + else: + np_deps[s[0].replace(" ", "")] = s[1].replace(" ", "").lstrip("=") + elif first_word == "--sys-pkg": + # Special extension to allow the specification of system dependencies + # (currently __cuda and __glibc) + split_res = REQ_SPLIT_LINE.match(splits[1]) + if split_res is None: + raise InvalidEnvironmentException( + "Could not parse system package '%s'" % splits[1] + ) + s = split_res.groups() + pkg_name = s[0].replace(" ", "") + if pkg_name not in CONDA_SYS_DEPENDENCIES: + raise InvalidEnvironmentException( + "System package '%s' not allowed. Values allowed are: %s" + % (pkg_name, str(CONDA_SYS_DEPENDENCIES)) + ) + if s[1] is None: + raise InvalidEnvironmentException( + "System package '%s' requires a version" % pkg_name + ) + sys_deps[pkg_name] = s[1].replace(" ", "").lstrip("=") + elif first_word.startswith("#"): + continue + elif first_word.startswith("-"): + raise InvalidEnvironmentException( + "'%s' is not a supported line in a requirements.txt" % line + ) + else: + try: + parsed_req = Requirement(line) + except InvalidRequirement as ex: + raise InvalidEnvironmentException("Could not parse '%s'" % line) from ex + if parsed_req.marker is not None: + raise InvalidEnvironmentException( + "Environment markers are not supported for '%s'" % line + ) + dep_name = parsed_req.name + if parsed_req.extras: + dep_name += "[%s]" % ",".join(parsed_req.extras) + if parsed_req.url: + dep_name += "@%s" % parsed_req.url + specifier = str(parsed_req.specifier).lstrip(" =") + if dep_name == "python": + if specifier: + python_version = specifier + else: + deps[dep_name] = specifier + return python_version + + +def parse_yml_value( + file_content: str, + _: Dict[str, List[str]], + sources: Dict[str, List[str]], + conda_deps: Dict[str, str], + pypi_deps: Dict[str, str], + sys_deps: Dict[str, str], +) -> Optional[str]: + python_version = None # type: Optional[str] + mode = None + for line in file_content.splitlines(): + if not line: + continue + elif line[0] not in (" ", "-"): + line = line.strip() + if line == "channels:": + mode = "sources" + elif line == "dependencies:": + mode = "deps" + elif line == "pypi-indices:": + mode = "pypi_sources" + else: + mode = "ignore" + elif mode and mode.endswith("sources"): + line = line.lstrip(" -").rstrip() + sources.setdefault("conda" if mode == "sources" else "pypi", []).append( + line + ) + elif mode and mode.endswith("deps"): + line = line.lstrip(" -").rstrip() + if line == "pip:": + mode = "pypi_deps" + elif line == "sys:": + mode = "sys_deps" + else: + to_update = ( + conda_deps + if mode == "deps" + else pypi_deps if mode == "pypi_deps" else sys_deps + ) + splits = YML_SPLIT_LINE.split(line.replace(" ", ""), maxsplit=1) + if len(splits) == 1: + if splits[0] != "python": + if mode == "sys_deps": + raise InvalidEnvironmentException( + "System package '%s' requires a version" % splits[0] + ) + to_update[splits[0]] = "" + else: + dep_name, dep_operator, dep_version = splits + if dep_operator not in ("=", "=="): + if mode == "sys_deps": + raise InvalidEnvironmentException( + "System package '%s' requires a specific version not '%s'" + % (splits[0], dep_operator + dep_version) + ) + dep_version = dep_operator + dep_version + if dep_name == "python": + if dep_version: + if python_version: + raise InvalidEnvironmentException( + "Python versions specified multiple times in " + "the YAML file." + ) + python_version = dep_version + else: + if ( + dep_name.startswith("/") + or dep_name.startswith("git+") + or dep_name.startswith("https://") + or dep_name.startswith("ssh://") + ): + # Handle the case where only the URL is specified + # without a package name + depname_and_maybe_tag = dep_name.split("/")[-1] + depname = depname_and_maybe_tag.split("@")[0] + if depname.endswith(".git"): + depname = depname[:-4] + dep_name = "%s@%s" % (depname, dep_name) + + if ( + mode == "sys_deps" + and dep_name not in CONDA_SYS_DEPENDENCIES + ): + raise InvalidEnvironmentException( + "System package '%s' not allowed. Values allowed are: %s" + % (dep_name, str(CONDA_SYS_DEPENDENCIES)) + ) + to_update[dep_name] = dep_version + + return python_version From 6c9b1732e13d1cc73a99c58a6cb7e3c4b04b8026 Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Fri, 6 Dec 2024 12:11:24 -0800 Subject: [PATCH 2/5] Add import of parsers at top level --- .../netflix_ext/toplevel/netflixext_toplevel.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metaflow_extensions/netflix_ext/toplevel/netflixext_toplevel.py b/metaflow_extensions/netflix_ext/toplevel/netflixext_toplevel.py index bd1e1fc..ce03323 100644 --- a/metaflow_extensions/netflix_ext/toplevel/netflixext_toplevel.py +++ b/metaflow_extensions/netflix_ext/toplevel/netflixext_toplevel.py @@ -1,4 +1,7 @@ from .netflixext_version import netflixext_version +# Parsers +from ..plugins.conda.parsers import req_parser, yml_parser + __mf_extensions__ = "netflix-ext" __version__ = netflixext_version From aadce54d7dc9d7d3dd9211eed61009f2109a5f15 Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Tue, 10 Dec 2024 01:19:04 -0800 Subject: [PATCH 3/5] Fixes and more compatibility with Mamba 2.0 --- .../cmd/debug/debug_stub_generator.py | 7 ++++- .../cmd/environment/environment_cmd.py | 6 ++-- .../netflix_ext/plugins/conda/conda.py | 29 +++++++++++++++---- .../netflix_ext/plugins/conda/utils.py | 4 +++ 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/metaflow_extensions/netflix_ext/cmd/debug/debug_stub_generator.py b/metaflow_extensions/netflix_ext/cmd/debug/debug_stub_generator.py index 37ab268..ababa99 100644 --- a/metaflow_extensions/netflix_ext/cmd/debug/debug_stub_generator.py +++ b/metaflow_extensions/netflix_ext/cmd/debug/debug_stub_generator.py @@ -21,7 +21,12 @@ def __init__(self, task_pathspec): self.run_id = self.task.parent.parent.id self.flow_name = self.task.parent.parent.parent.id self.is_new_conda_step = self.is_new_conda_step() - self.workflow_dag = self.task["_graph_info"].data + + parameters_task = Step( + "%s/_parameters" % self.task.parent.pathspec, _namespace_check=False + ).task + + self.workflow_dag = parameters_task["_graph_info"].data self.file_name = self.workflow_dag["file"] self._dag_structure = self.get_dag_structure(self.workflow_dag["steps"]) self.step_type = self.get_step_type(self.step_name) diff --git a/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py b/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py index c36f7ca..b9a8528 100644 --- a/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py +++ b/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py @@ -36,13 +36,13 @@ ResolvedEnvironment, env_type_for_deps, ) -from metaflow_extensions.nflx.plugins.new_conda.envsresolver import EnvsResolver +from metaflow_extensions.netflix_ext.plugins.conda.envsresolver import EnvsResolver -from metaflow_extensions.nflx.plugins.new_conda.parsers import ( +from metaflow_extensions.netflix_ext.plugins.conda.parsers import ( parse_req_value, parse_yml_value, ) -from metaflow_extensions.nflx.plugins.new_conda.utils import ( +from metaflow_extensions.netflix_ext.plugins.conda.utils import ( AliasType, arch_id, channel_or_url, diff --git a/metaflow_extensions/netflix_ext/plugins/conda/conda.py b/metaflow_extensions/netflix_ext/plugins/conda/conda.py index ec2093a..51760ed 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/conda.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/conda.py @@ -267,17 +267,36 @@ def call_conda( raise InvalidEnvironmentException("Binary '%s' unknown" % binary) try: env = {"CONDA_JSON": "True"} + initial_command = args[0] if args else None if self._conda_executable_type == "mamba": env.update({"MAMBA_NO_BANNER": "1", "MAMBA_JSON": "True"}) - if addl_env: - env.update(addl_env) - if ( - args - and args[0] not in ("package", "info") + self._mode == "local" + and CONDA_LOCAL_PATH and (self.is_non_conda_exec or binary == "micromamba") ): + # In this case, we need to prepend some options to the arguments + # to ensure that it uses CONDA_LOCAL_PATH and not the system one + args = [ + "--no-env", + "--rc-file", + os.path.join(CONDA_LOCAL_PATH, ".mambarc"), + "-r", + CONDA_LOCAL_PATH, + ] + args + if initial_command and initial_command not in ("package", "info"): + args.append("--json") + elif ( + (self._conda_executable_type == "micromamba" or binary == "micromamba") + and initial_command + and initial_command not in ("package", "info") + ): + # This is in the remote case. args.extend(["-r", self.root_prefix, "--json"]) + + if addl_env: + env.update(addl_env) + debug.conda_exec("Conda call: %s" % str([self._bins[binary]] + args)) return cast( bytes, diff --git a/metaflow_extensions/netflix_ext/plugins/conda/utils.py b/metaflow_extensions/netflix_ext/plugins/conda/utils.py index 0d3375f..f42c161 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/utils.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/utils.py @@ -213,6 +213,10 @@ def get_sys_packages( result = { k: v for k, v in result.items() if k in CONDA_SYS_DEPENDENCIES and v is not None } + if "__cuda" in result and not result["__cuda"].endswith("=0"): + # Make things consistent so users can specify 12.2 and it is like 12.2=0 + # (otherwise it sometimes causes re-resolution) + result["__cuda"] = result["__cuda"] + "=0" return result From 19096c7f775f98b8f61bfbb85d15d1b822dee8e4 Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Tue, 10 Dec 2024 01:29:25 -0800 Subject: [PATCH 4/5] Typos --- .../netflix_ext/cmd/environment/environment_cmd.py | 1 + metaflow_extensions/netflix_ext/plugins/conda/conda.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py b/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py index b9a8528..fb4daac 100644 --- a/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py +++ b/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py @@ -21,6 +21,7 @@ from metaflow.metaflow_config import ( CONDA_ALL_ARCHS, CONDA_DEPENDENCY_RESOLVER, + CONDA_TEST, DEFAULT_DATASTORE, DEFAULT_METADATA, get_pinned_conda_libs, diff --git a/metaflow_extensions/netflix_ext/plugins/conda/conda.py b/metaflow_extensions/netflix_ext/plugins/conda/conda.py index 51760ed..0572542 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/conda.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/conda.py @@ -2068,7 +2068,9 @@ def _validate_conda_installation(self) -> Optional[Exception]: self.is_non_conda_exec = True elif "mamba version" in self._info_no_lock: # Mamba 2.0+ has mamba version but no conda version - if parse_version(self._info_no_lock) < parse_version("2.0.0"): + if parse_version(self._info_no_lock["mamba version"]) < parse_version( + "2.0.0" + ): return InvalidEnvironmentException( self._install_message_for_resolver("mamba") ) From 654f10a7b06a624e559ec3634fa71e8271912857 Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Fri, 20 Dec 2024 00:06:13 -0800 Subject: [PATCH 5/5] More fixes from upstream --- .../cmd/debug/debug_stub_generator.py | 2 +- .../netflix_ext/plugins/conda/conda.py | 84 ++++++++++++++++--- .../netflix_ext/plugins/conda/env_descr.py | 2 +- .../netflix_ext/plugins/conda/utils.py | 2 +- 4 files changed, 74 insertions(+), 16 deletions(-) diff --git a/metaflow_extensions/netflix_ext/cmd/debug/debug_stub_generator.py b/metaflow_extensions/netflix_ext/cmd/debug/debug_stub_generator.py index ababa99..c0dadba 100644 --- a/metaflow_extensions/netflix_ext/cmd/debug/debug_stub_generator.py +++ b/metaflow_extensions/netflix_ext/cmd/debug/debug_stub_generator.py @@ -23,7 +23,7 @@ def __init__(self, task_pathspec): self.is_new_conda_step = self.is_new_conda_step() parameters_task = Step( - "%s/_parameters" % self.task.parent.pathspec, _namespace_check=False + "%s/_parameters" % self.task.parent.parent.pathspec, _namespace_check=False ).task self.workflow_dag = parameters_task["_graph_info"].data diff --git a/metaflow_extensions/netflix_ext/plugins/conda/conda.py b/metaflow_extensions/netflix_ext/plugins/conda/conda.py index 0572542..7249029 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/conda.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/conda.py @@ -266,10 +266,13 @@ def call_conda( if self._bins is None or self._bins[binary] is None: raise InvalidEnvironmentException("Binary '%s' unknown" % binary) try: - env = {"CONDA_JSON": "True"} + env = {} + if addl_env: + env.update(addl_env) + self._envars_for_conda_exec(env) + initial_command = args[0] if args else None - if self._conda_executable_type == "mamba": - env.update({"MAMBA_NO_BANNER": "1", "MAMBA_JSON": "True"}) + if ( self._mode == "local" and CONDA_LOCAL_PATH @@ -278,7 +281,6 @@ def call_conda( # In this case, we need to prepend some options to the arguments # to ensure that it uses CONDA_LOCAL_PATH and not the system one args = [ - "--no-env", "--rc-file", os.path.join(CONDA_LOCAL_PATH, ".mambarc"), "-r", @@ -294,10 +296,10 @@ def call_conda( # This is in the remote case. args.extend(["-r", self.root_prefix, "--json"]) - if addl_env: - env.update(addl_env) - - debug.conda_exec("Conda call: %s" % str([self._bins[binary]] + args)) + debug.conda_exec( + "Conda call: %s (env: %s)" + % (str([self._bins[binary]] + args), str(env)) + ) return cast( bytes, subprocess.check_output( @@ -353,6 +355,7 @@ def call_binary( raise InvalidEnvironmentException("Binary '%s' unknown" % binary) if addl_env is None: addl_env = {} + self._envars_for_conda_exec(addl_env) try: debug.conda_exec("Binary call: %s" % str([binary] + args)) return subprocess.check_output( @@ -1835,8 +1838,18 @@ def _ensure_local_conda(self): os.path.join(CONDA_LOCAL_PATH, "..", ".conda-install.lock") ), ): + # This check here is in case someone else grabbed the lock first + # and successfully installed the environment if self._validate_conda_installation(): self._install_local_conda() + + # This last check is actually just to set the + # self.is_non_conda_exec flag correctly. In the case of local + # installed conda, the call itself is very fast so it should + # not be significant to call it three times. + err = self._validate_conda_installation() + if err: + raise err else: self._bins = { self._conda_executable_type: which(self._conda_executable_type), @@ -1950,9 +1963,9 @@ def _install_remote_conda(self): # inside the current directory parent_dir = os.path.dirname(os.getcwd()) if os.access(parent_dir, os.W_OK): - final_path = os.path.join(parent_dir, "conda_env", "__conda_installer") + final_path = os.path.join(parent_dir, "conda_env", "micromamba") else: - final_path = os.path.join(os.getcwd(), "conda_env", "__conda_installer") + final_path = os.path.join(os.getcwd(), "conda_env", "micromamba") if os.path.isfile(final_path): os.unlink(final_path) @@ -2003,6 +2016,15 @@ def _validate_conda_installation(self) -> Optional[Exception]: return InvalidEnvironmentException( "Missing special marker .metaflow-local-env in locally installed environment" ) + # We need to check if we need to set is_non_conda_exec + # Note that mamba < 2.0 has mamba_version in the info and not mamba version + self.is_non_conda_exec = (self._conda_executable_type == "micromamba") or ( + self._conda_executable_type == "mamba" + and "mamba version" in self._info_no_lock + ) + if self.is_non_conda_exec: + # Clear out the info so we can properly re-compute it next time + self._cached_info = None # We consider that locally installed environments are OK return None @@ -2083,7 +2105,8 @@ def _validate_conda_installation(self) -> Optional[Exception]: return InvalidEnvironmentException( self._install_message_for_resolver(self._conda_executable_type) ) - # TODO: We should check mamba version too + if self.is_non_conda_exec: + self._cached_info = None # TODO: Re-add support for micromamba server when it is actually out # Even if we have conda/mamba as the dependency solver, we can also possibly @@ -2308,6 +2331,7 @@ def _info_no_lock(self) -> Dict[str, Any]: self._cached_info["root_prefix"] = self._cached_info["base environment"] self._cached_info["envs_dirs"] = self._cached_info["envs directories"] self._cached_info["pkgs_dirs"] = self._cached_info["package cache"] + debug.conda_exec("Got info: %s" % str(self._cached_info)) return self._cached_info @@ -2406,9 +2430,10 @@ def _create(self, env: ResolvedEnvironment, env_name: str) -> str: % (p.filename, cache_info.url) ) explicit_urls.append( - "%s#%s\n" + "%s#%s%s\n" % ( self._make_urlstxt_from_cacheurl(cache_info.url), + "md5=" if self.is_non_conda_exec else "", cache_info.hash, ) ) @@ -2460,7 +2485,9 @@ def _create(self, env: ResolvedEnvironment, env_name: str) -> str: # any file and letting things get compiled lazily. This may have the # added benefit of a faster environment creation. # This works with Micromamba and Mamba 2.0+. - args.append("--no-pyc") + args.extend( + ["--no-pyc", "--safety-checks=disabled", "--no-extra-safety-checks"] + ) args.extend( [ "--name", @@ -2496,6 +2523,12 @@ def _create(self, env: ResolvedEnvironment, env_name: str) -> str: "install", "--no-deps", "--no-input", + "--no-index", + "--no-user", + "--no-warn-script-location", + "--no-cache-dir", + "--root-user-action=ignore", + "--disable-pip-version-check", ] if self.is_non_conda_exec: # Be consistent with what we install with micromamba @@ -2645,6 +2678,31 @@ def _install_message_for_resolver(resolver: str) -> str: else: return "Unknown resolver '%s'" % resolver + def _envars_for_conda_exec(self, env_vars: Dict[str, str]): + def _update_if_not_present(d: Dict[str, str]): + for k, v in d.items(): + if k not in env_vars: + env_vars[k] = v + + # When running non-conda (mamba 2.0+ or micromamba) with CONDA_LOCAL_PATH, we + # need to set a few more environment variables to lock it down. Even in other + # cases, we also need to set certain environment variables + env_vars.update( + {"CONDA_JSON": "True", "MAMBA_JSON": "True", "MAMBA_NO_BANNER": "1"} + ) + if CONDA_LOCAL_PATH and self._mode == "local": + _update_if_not_present({"MAMBA_ROOT_PREFIX": CONDA_LOCAL_PATH}) + if self.is_non_conda_exec: + _update_if_not_present( + { + "CONDA_PKGS_DIRS": os.path.join(CONDA_LOCAL_PATH, "pkgs"), + "CONDA_ENVS_DIRS": os.path.join(CONDA_LOCAL_PATH, "envs"), + } + ) + # Transfer to MAMBA prefix + if self.is_non_conda_exec and "CONDA_CHANNEL_PRIORITY" in env_vars: + env_vars["MAMBA_CHANNEL_PRIORITY"] = env_vars["CONDA_CHANNEL_PRIORITY"] + def _start_micromamba_server(self): if not self._have_micromamba_server: return diff --git a/metaflow_extensions/netflix_ext/plugins/conda/env_descr.py b/metaflow_extensions/netflix_ext/plugins/conda/env_descr.py index 5cd5d88..0793348 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/env_descr.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/env_descr.py @@ -760,7 +760,7 @@ def __init__( env_full_id = self._compute_hash( [ "%s#%s" % (p.filename, p.pkg_hash(p.url_format)) - for p in all_packages + for p in sorted(all_packages, key=lambda p: p.filename) ] + [arch or arch_id()] ) diff --git a/metaflow_extensions/netflix_ext/plugins/conda/utils.py b/metaflow_extensions/netflix_ext/plugins/conda/utils.py index f42c161..b61e2a5 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/utils.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/utils.py @@ -101,7 +101,7 @@ class AliasType(Enum): CONDA_FORMATS = _ALL_CONDA_FORMATS # type: Tuple[str, ...] FAKEURL_PATHCOMPONENT = "_fake" -_double_equal_match = re.compile("==(?=[<=>!~])") +_double_equal_match = re.compile(r"==(?=[<=>!~])") class CondaException(MetaflowException):