diff --git a/conda-store-server/conda_store_server/_internal/alembic/versions/89637f546129_remove_conda_package_build_channel.py b/conda-store-server/conda_store_server/_internal/alembic/versions/89637f546129_remove_conda_package_build_channel.py deleted file mode 100644 index cfecde382..000000000 --- a/conda-store-server/conda_store_server/_internal/alembic/versions/89637f546129_remove_conda_package_build_channel.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright (c) conda-store development team. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -"""remove conda package build channel - -Revision ID: 89637f546129 -Revises: bf065abf375b -Create Date: 2024-12-04 13:09:25.562450 - -""" -from alembic import op -from sqlalchemy import Column, INTEGER, String, ForeignKey, table, select - - -# revision identifiers, used by Alembic. -revision = '89637f546129' -down_revision = 'bf065abf375b' -branch_labels = None -depends_on = None - -# Due to the issue fixed in https://github.com/conda-incubator/conda-store/pull/961 -# many conda_package_build entries have the wrong package entry (but the right channel). -# Because the packages are duplicated, we can not recreate the _conda_package_build_uc -# constraint without the channel_id. -# So, this function will go thru each conda_package_build and re-associate it with the -# correct conda_package based on the channel id. -def fix_misrepresented_packages(conn): - # conda_packages is a hash of channel-id_name_version to conda_package id - conda_packages = {} - - # dummy tables to run queries against - conda_package_build_table = table( - "conda_package_build", - Column("id", INTEGER), - Column("channel_id", INTEGER), - Column("package_id", INTEGER, ForeignKey("conda_package.id")), - ) - conda_package_table = table( - "conda_package", - Column("id", INTEGER), - Column("channel_id", INTEGER), - Column("name", String), - Column("version", String), - ) - - def get_conda_package_id(conn, channel_id, name, version): - hashed_name = f"{channel_id}-{name}-{version}" - - # if package exists in the conda_packages dict, return it - if hashed_name in conda_packages: - return conda_packages[hashed_name] - - # if not, then query the db for the package - package = conn.execute( - select(conda_package_table).where( - conda_package_table.c.channel_id == channel_id, - conda_package_table.c.name == name, - conda_package_table.c.version == version, - ) - ).first() - - # save the package into the conda_packages dict - conda_packages[hashed_name] = package.id - return package.id - - for row in conn.execute( - select( - conda_package_build_table.c.id, - conda_package_build_table.c.package_id, - conda_package_build_table.c.channel_id, - conda_package_table.c.name, - conda_package_table.c.version - ).join( - conda_package_build_table, - conda_package_build_table.c.package_id == conda_package_table.c.id - ) - ): - # the channel_id might already be empty - if row[2] is None: - continue - - package_id = get_conda_package_id(conn, row[2], row[3], row[4]) - # if found package id does not match the found package id, we'll need to updated it - if package_id != row[1]: - update_package_query = conda_package_build_table.update().where( - conda_package_build_table.c.id == op.inline_literal(row[0]) - ).values( - {"package_id": op.inline_literal(package_id)} - ) - conn.execute(update_package_query) - conn.commit() - -def upgrade(): - bind = op.get_bind() - - # So, go thru each conda_package_build and re-associate it with the correct conda_package - # based on the channel id. - fix_misrepresented_packages(bind) - - with op.batch_alter_table("conda_package_build") as batch_op: - # remove channel column from constraints - batch_op.drop_constraint( - "_conda_package_build_uc", - ) - - # re-add the constraint without the channel column - batch_op.create_unique_constraint( - "_conda_package_build_uc", - [ - "package_id", - "subdir", - "build", - "build_number", - "sha256", - ], - ) - - # remove channel column - batch_op.drop_column( - "channel_id", - ) - - -def downgrade(): - with op.batch_alter_table("conda_package_build") as batch_op: - # remove channel column from constraints - batch_op.drop_constraint( - constraint_name="_conda_package_build_uc", - ) - - # add channel column - batch_op.add_column( - Column("channel_id", INTEGER) - ) - - batch_op.create_foreign_key("fk_channel_id", "conda_channel", ["channel_id"], ["id"]) - - # re-add the constraint with the channel column - batch_op.create_unique_constraint( - constraint_name="_conda_package_build_uc", - columns=[ - "channel_id", - "package_id", - "subdir", - "build", - "build_number", - "sha256", - ], - ) diff --git a/conda-store-server/conda_store_server/_internal/orm.py b/conda-store-server/conda_store_server/_internal/orm.py index 32829fe0a..36f7bd051 100644 --- a/conda-store-server/conda_store_server/_internal/orm.py +++ b/conda-store-server/conda_store_server/_internal/orm.py @@ -758,6 +758,7 @@ class CondaPackageBuild(Base): __table_args__ = ( UniqueConstraint( + "channel_id", "package_id", "subdir", "build", @@ -772,6 +773,14 @@ class CondaPackageBuild(Base): package_id: Mapped[int] = mapped_column(ForeignKey("conda_package.id")) package: Mapped["CondaPackage"] = relationship(back_populates="builds") + """ + Some package builds have the exact same data from different channels. + Thus, when adding a channel, populating CondaPackageBuild can encounter + duplicate keys errors. That's why we need to distinguish them by channel_id. + """ + channel_id: Mapped[int] = mapped_column(ForeignKey("conda_channel.id")) + channel: Mapped["CondaChannel"] = relationship(CondaChannel) + build: Mapped[str] = mapped_column(Unicode(64), index=True) build_number: Mapped[int] constrains: Mapped[dict] = mapped_column(JSON) diff --git a/conda-store-server/tests/_internal/alembic/version/test_89637f546129_remove_conda_package_build_channel.py b/conda-store-server/tests/_internal/alembic/version/test_89637f546129_remove_conda_package_build_channel.py deleted file mode 100644 index 35c8d7b8e..000000000 --- a/conda-store-server/tests/_internal/alembic/version/test_89637f546129_remove_conda_package_build_channel.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) conda-store development team. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -import pytest -from sqlalchemy import text -from sqlalchemy.exc import OperationalError - -from conda_store_server import api -from conda_store_server._internal import orm - - -def setup_bad_data_db(conda_store): - """A database fixture populated with - * 2 channels - * 2 conda packages - * 5 conda package builds - """ - with conda_store.session_factory() as db: - # create test channels - api.create_conda_channel(db, "test-channel-1") - api.create_conda_channel(db, "test-channel-2") - db.commit() - - # create some sample conda_package's - # For simplicity, the channel_id's match the id of the conda_package. - # So, when checking that the package build entries have been reassembled - # the right way, check that the package_id in the conda_package_build is - # equal to what would have been the channel_id (before the migration is run) - conda_package_records = [ - { - "id": 1, - "channel_id": 1, - "name": "test-package-1", - "version": "1.0.0", - }, - { - "id": 2, - "channel_id": 2, - "name": "test-package-1", - "version": "1.0.0", - }, - ] - for cpb in conda_package_records: - conda_package = orm.CondaPackage(**cpb) - db.add(conda_package) - db.commit() - - # create some conda_package_build's - conda_package_builds = [ - { - "id": 1, - "build": "py310h06a4308_0", - "package_id": 1, - "build_number": 0, - "sha256": "one", - "subdir": "linux-64", - }, - { - "id": 2, - "build": "py311h06a4308_0", - "package_id": 1, - "build_number": 0, - "sha256": "two", - "subdir": "linux-64", - }, - { - "id": 3, - "build": "py38h06a4308_0", - "package_id": 1, - "build_number": 0, - "sha256": "three", - "subdir": "linux-64", - }, - { - "id": 4, - "build": "py39h06a4308_0", - "package_id": 2, - "build_number": 0, - "sha256": "four", - "subdir": "linux-64", - }, - { - "id": 5, - "build": "py310h06a4308_0", - "package_id": 2, - "build_number": 0, - "sha256": "five", - "subdir": "linux-64", - }, - ] - default_values = { - "depends": "", - "md5": "", - "timestamp": 0, - "constrains": "", - "size": 0, - } - for cpb in conda_package_builds: - conda_package = orm.CondaPackageBuild(**cpb, **default_values) - db.add(conda_package) - db.commit() - - # force in some channel data - # conda_package_build 1 should have package_id 2 after migration - db.execute(text("UPDATE conda_package_build SET channel_id=2 WHERE id=1")) - # conda_package_build 2 should have package_id 1 after migration - db.execute(text("UPDATE conda_package_build SET channel_id=1 WHERE id=2")) - # conda_package_build 3 should have package_id 1 after migration - db.execute(text("UPDATE conda_package_build SET channel_id=1 WHERE id=3")) - # conda_package_build 4 should have package_id 2 after migration - db.execute(text("UPDATE conda_package_build SET channel_id=2 WHERE id=4")) - - # don't set conda_package_build 5 channel_id as a test case - # conda_package_build 5 package_id should be unchanged (2) after migration - - db.commit() - - -def test_remove_conda_package_build_channel_basic( - conda_store, alembic_config, alembic_runner -): - """Simply run the upgrade and downgrade for this migration""" - # migrate all the way to the target revision - alembic_runner.migrate_up_to("89637f546129") - - # try downgrading - alembic_runner.migrate_down_one() - - # ensure the channel_id column exists, will error if channel_id column does not exist - with conda_store.session_factory() as db: - db.execute(text("SELECT channel_id from conda_package_build")) - - # try upgrading once more - alembic_runner.migrate_up_one() - - # ensure the channel_id column exists, will error if channel_id column does not exist - with conda_store.session_factory() as db: - with pytest.raises(OperationalError): - db.execute(text("SELECT channel_id from conda_package_build")) - - -def test_remove_conda_package_build_bad_data( - conda_store, alembic_config, alembic_runner -): - """Simply run the upgrade and downgrade for this migration""" - # migrate all the way to the target revision - alembic_runner.migrate_up_to("89637f546129") - - # try downgrading - alembic_runner.migrate_down_one() - - # ensure the channel_id column exists, will error if channel_id column does not exist - with conda_store.session_factory() as db: - db.execute(text("SELECT channel_id from conda_package_build")) - - # seed db with data that has broken data - setup_bad_data_db(conda_store) - - # try upgrading once more - alembic_runner.migrate_up_one() - - # ensure the channel_id column exists, will error if channel_id column does not exist - with conda_store.session_factory() as db: - with pytest.raises(OperationalError): - db.execute(text("SELECT channel_id from conda_package_build")) - - # ensure all packages builds have the right package associated - with conda_store.session_factory() as db: - build = ( - db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.id == 1) - .first() - ) - assert build.package_id == 2 - - build = ( - db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.id == 2) - .first() - ) - assert build.package_id == 1 - - build = ( - db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.id == 3) - .first() - ) - assert build.package_id == 1 - - build = ( - db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.id == 4) - .first() - ) - assert build.package_id == 2 - - build = ( - db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.id == 5) - .first() - ) - assert build.package_id == 2 diff --git a/conda-store-server/tests/_internal/test_orm.py b/conda-store-server/tests/_internal/test_orm.py index ac2e45a73..d35601be2 100644 --- a/conda-store-server/tests/_internal/test_orm.py +++ b/conda-store-server/tests/_internal/test_orm.py @@ -53,6 +53,7 @@ def populated_db(db): 1, { "build": "py310h06a4308_0", + "channel_id": 1, "build_number": 0, "sha256": "11f080b53b36c056dbd86ccd6dc56c40e3e70359f64279b1658bb69f91ae726f", "subdir": "linux-64", @@ -67,6 +68,7 @@ def populated_db(db): 1, { "build": "py311h06a4308_0", + "channel_id": 1, "build_number": 0, "sha256": "f0719ee6940402a1ea586921acfaf752fda977dbbba74407856a71ba7f6c4e4a", "subdir": "linux-64", @@ -81,6 +83,7 @@ def populated_db(db): 1, { "build": "py38h06a4308_0", + "channel_id": 1, "build_number": 0, "sha256": "39e39a23baebd0598c1b59ae0e82b5ffd6a3230325da4c331231d55cbcf13b3e", "subdir": "linux-64", @@ -217,13 +220,14 @@ def test_update_packages_add_existing_pkg_new_version( assert count == 2 count = populated_db.query(orm.CondaPackage).count() assert count == 3 - num_builds = ( + builds = ( populated_db.query(orm.CondaPackageBuild) - .join(orm.CondaPackage) - .filter(orm.CondaPackage.channel_id == 1) - .count() + .filter(orm.CondaPackageBuild.channel_id == 1) + .all() ) - assert num_builds == 4 + assert len(builds) == 4 + for b in builds: + assert b.package.channel_id == 1 count = populated_db.query(orm.CondaPackageBuild).count() assert count == 4 @@ -291,13 +295,14 @@ def test_update_packages_multiple_subdirs(mock_repdata, populated_db): .count() ) assert count == 2 - num_builds = ( + builds = ( populated_db.query(orm.CondaPackageBuild) - .join(orm.CondaPackage) - .filter(orm.CondaPackage.channel_id == 1) - .count() + .filter(orm.CondaPackageBuild.channel_id == 1) + .all() ) - assert num_builds == 5 + assert len(builds) == 5 + for b in builds: + assert b.package.channel_id == 1 @mock.patch("conda_store_server._internal.conda_utils.download_repodata") @@ -321,15 +326,13 @@ def check_packages(): assert len(conda_packages) == 1 conda_packages = ( populated_db.query(orm.CondaPackageBuild) - .join(orm.CondaPackage) - .filter(orm.CondaPackage.channel_id == 1) + .filter(orm.CondaPackageBuild.channel_id == 1) .all() ) assert len(conda_packages) == 4 conda_packages = ( populated_db.query(orm.CondaPackageBuild) - .join(orm.CondaPackage) - .filter(orm.CondaPackage.channel_id == 2) + .filter(orm.CondaPackageBuild.channel_id == 2) .all() ) assert len(conda_packages) == 0 @@ -368,13 +371,14 @@ def test_update_packages_new_package_channel(mock_repdata, populated_db, test_re .count() ) assert count == 2 - num_builds = ( + builds = ( populated_db.query(orm.CondaPackageBuild) - .join(orm.CondaPackage) - .filter(orm.CondaPackage.channel_id == 2) - .count() + .filter(orm.CondaPackageBuild.channel_id == 2) + .all() ) - assert num_builds == 1 + assert len(builds) == 1 + for b in builds: + assert b.package.channel_id == 2 count = populated_db.query(orm.CondaPackageBuild).count() assert count == 4 @@ -401,10 +405,49 @@ def test_update_packages_multiple_builds( # ensure it is added to conda package builds count = populated_db.query(orm.CondaPackageBuild).count() assert count == 5 - num_builds = ( + builds = ( populated_db.query(orm.CondaPackageBuild) - .join(orm.CondaPackage) - .filter(orm.CondaPackage.channel_id == 2) - .count() + .filter(orm.CondaPackageBuild.channel_id == 2) + .all() + ) + assert len(builds) == 2 + for b in builds: + assert b.package.channel_id == 2 + + +@mock.patch("conda_store_server._internal.conda_utils.download_repodata") +def test_update_packages_channel_consistency( + mock_repdata, populated_db, test_repodata_multiple_packages +): + mock_repdata.return_value = test_repodata_multiple_packages + + channel = ( + populated_db.query(orm.CondaChannel).filter(orm.CondaChannel.id == 2).first() + ) + channel.update_packages(populated_db, "linux-64") + + # ensure the package builds end up with the correct channel + builds = ( + populated_db.query(orm.CondaPackageBuild) + .filter(orm.CondaPackageBuild.channel_id == 2) + .all() + ) + for b in builds: + assert b.channel_id == 2 + assert b.package.channel_id == 2 + + # again with another channel + channel = ( + populated_db.query(orm.CondaChannel).filter(orm.CondaChannel.id == 1).first() + ) + channel.update_packages(populated_db, "linux-64") + + # ensure the package builds end up with the correct channel + builds = ( + populated_db.query(orm.CondaPackageBuild) + .filter(orm.CondaPackageBuild.channel_id == 1) + .all() ) - assert num_builds == 2 + for b in builds: + assert b.channel_id == 1 + assert b.package.channel_id == 1