core\feat: #45 remove non pypi deps

- remove dependency to github package `python-gcpds.image_segmentation` for enabling pypi publishing - locally implemented oxford pet dataset mapper
blotero · May 14, 2024 · 825b462 · 825b462
1 parent c26a263
commit 825b462
Show file tree

Hide file tree

Showing 6 changed files with 109 additions and 281 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,5 @@
 node_modules
 docs/build
 **/Seed-Detection-2-1
-notebooks/**/.ipynb_checkpoints
+notebooks/**/.ipynb_checkpoints
+core/dist
diff --git a/core/poetry.lock b/core/poetry.lock
diff --git a/core/pyproject.toml b/core/pyproject.toml
@@ -19,8 +19,8 @@ keras = "2.15.0"
 tensorflow = "2.15.1"
 matplotlib = "^3.8.4"
 opencv-python = "^4.9.0.80"
-gcpds-image-segmentation = { git = "https://github.com/UN-GCPDS/python-gcpds.image_segmentation" }
 tensorflow-datasets = "^4.9.4"
+gdown = "^5.2.0"
 
 
 [tool.poetry.group.test.dependencies]

diff --git a/core/seg_tgce/data/oxford_pet/oxford_iiit_pet.py b/core/seg_tgce/data/oxford_pet/oxford_iiit_pet.py
@@ -0,0 +1,83 @@
+# Inspired from https://github.com/UN-GCPDS/python-gcpds.image_segmentation
+# Original license: BSD-2-Clause
+
+from functools import cache
+from typing import List, Tuple
+
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from tensorflow import Tensor
+
+
+class OxfordIiitPet:
+    def __init__(
+        self,
+        split: Tuple[float, float, float] = (70.0, 15.0, 15.0),
+        one_hot: bool = True,
+    ):
+        self.one_hot = one_hot
+        self.split = OxfordIiitPet._get_splits(split)
+        dataset, info = tfds.load(
+            "oxford_iiit_pet:3.*.*", with_info=True, split=self.split
+        )
+        self.info = info
+        train, val, test = dataset
+        self.classes = 3
+        train = train.map(self._keep_interface)
+        val = val.map(self._keep_interface)
+        test = test.map(self._keep_interface)
+
+        self.mapped_dataset = train, val, test
+        self.labels_info = {0: "cat", 1: "dog"}
+
+    @cache  # pylint: disable=method-cache-max-size-none
+    def load_instance_by_id(  # type: ignore
+        self,
+        id_img: str,
+    ) -> Tuple[Tensor, Tensor, Tensor, str]:
+        for dataset in self.mapped_dataset:
+            dataset = dataset.filter(
+                lambda img, mask, label, id_image: id_image == id_img
+            )
+            for x in dataset:
+                return x
+
+    @staticmethod
+    def _get_splits(splits: Tuple[float, float, float]) -> List[str]:
+        percentage_sum = 0.0
+        splits_ = []
+        for percentage in splits:
+            percentage_sum += percentage
+            splits_.append(f"train[{percentage_sum-percentage}%:{percentage_sum}%]")
+        return splits_
+
+    def to_one_hot(self, mask: Tensor) -> Tensor:
+        one_hot = tf.one_hot(mask, self.classes)
+        return tf.gather(one_hot, 0, axis=2)
+
+    def _keep_interface(self, x: dict) -> Tuple[Tensor, Tensor, Tensor, str]:
+        img = tf.cast(x["image"], tf.float32) / 255.0
+        mask = x["segmentation_mask"] - 1
+        mask = self.to_one_hot(mask) if self.one_hot else mask
+        label = x["species"]
+        id_image = x["file_name"]
+        return img, mask, label, id_image
+
+    def __call__(
+        self,
+    ):
+        return self.mapped_dataset
+
+
+if __name__ == "__main__":
+    dataset = OxfordIiitPet()
+    train_dataset, val_dataset, test_dataset = dataset()
+    for img, mask, label, id_img in train_dataset.take(1):
+        print(img.shape, mask.shape, label, id_img)
+        break
+    for img, mask, label, id_img in val_dataset.take(1):
+        print(img.shape, mask.shape, label, id_img)
+        break
+    for img, mask, label, id_img in test_dataset.take(1):
+        print(img.shape, mask.shape, label, id_img)
+        break
diff --git a/core/seg_tgce/data/oxford_pet/oxford_pet.py b/core/seg_tgce/data/oxford_pet/oxford_pet.py
@@ -1,9 +1,10 @@
 import tensorflow as tf
-from gcpds.image_segmentation.datasets.segmentation import OxfordIiitPet
 from keras.models import Model
 
 from seg_tgce.data.utils import map_dataset_multiple_annotators
 
+from .oxford_iiit_pet import OxfordIiitPet
+
 MODEL_ORIGINAL_SHAPE = (256, 256)
 
 

diff --git a/core/seg_tgce/data/utils.py b/core/seg_tgce/data/utils.py
@@ -1,29 +1,29 @@
 from typing import Tuple
 
 import tensorflow as tf
-from gcpds.image_segmentation.datasets.segmentation import OxfordIiitPet
 from keras.models import Model
+from tensorflow import Tensor
 
 
 def disturb_mask(
     model: Model,
-    image: tf.Tensor,
+    image: Tensor,
     model_shape: Tuple[int, int],
     target_shape: Tuple[int, int],
-) -> tf.Tensor:
+) -> Tensor:
     return tf.image.resize(model(tf.image.resize(image, model_shape)), target_shape)
 
 
-def mix_channels(mask: tf.Tensor) -> tf.Tensor:
+def mix_channels(mask: Tensor) -> Tensor:
     return tf.stack([mask, 1 - mask], axis=-2)
 
 
 def add_noisy_annotators(
-    img: tf.Tensor,
-    models: list[tf.Tensor],
+    img: Tensor,
+    models: list[Tensor],
     model_shape: Tuple[int, int],
     target_shape: Tuple[int, int],
-) -> tf.Tensor:
+) -> Tensor:
     return tf.transpose(
         [
             disturb_mask(model, img, model_shape=model_shape, target_shape=target_shape)
@@ -34,12 +34,12 @@ def add_noisy_annotators(
 
 
 def map_dataset_multiple_annotators(
-    dataset: OxfordIiitPet,
+    dataset: Tensor,
     target_shape: tuple[int, int],
     model_shape: tuple[int, int],
     batch_size: int,
     disturbance_models: list[Model],
-) -> tf.Tensor:
+) -> Tensor:
     dataset_ = dataset.map(
         lambda img, mask, label, id_img: (img, mask),
         num_parallel_calls=tf.data.AUTOTUNE,