From 726227f596ad017c95c65efb5f47293b28c8c6ea Mon Sep 17 00:00:00 2001 From: Brandon Lotero Date: Mon, 29 Apr 2024 23:41:10 -0500 Subject: [PATCH] core\feat: #26 crowd seg generator - further definitions on crowd seg dataset generator --- core/seg_tgce/data/crowd_seg/generator.py | 139 +++++++++++++++++++++ core/seg_tgce/data/crowd_seg/map.py | 8 -- core/seg_tgce/data/crowd_seg/visualizer.py | 33 +++-- core/seg_tgce/py.typed | 0 4 files changed, 159 insertions(+), 21 deletions(-) create mode 100644 core/seg_tgce/data/crowd_seg/generator.py create mode 100644 core/seg_tgce/py.typed diff --git a/core/seg_tgce/data/crowd_seg/generator.py b/core/seg_tgce/data/crowd_seg/generator.py new file mode 100644 index 0000000..587f359 --- /dev/null +++ b/core/seg_tgce/data/crowd_seg/generator.py @@ -0,0 +1,139 @@ +import logging +import os +from typing import List + +import numpy as np +from keras.preprocessing.image import img_to_array, load_img +from keras.utils import Sequence +from matplotlib import pyplot as plt + +LOGGER = logging.getLogger(__name__) + + +class ImageDataGenerator(Sequence): # pylint: disable=too-many-instance-attributes + def __init__( # pylint: disable=too-many-arguments + self, + image_dir, + mask_dir, + n_classes, + image_size=(256, 256), + batch_size=32, + shuffle=True, + ): + self.image_dir = image_dir + self.mask_dir = mask_dir + self.image_size = image_size + self.batch_size = batch_size + self.shuffle = shuffle + self.image_filenames = sorted( + [ + filename + for filename in os.listdir(image_dir) + if filename.endswith(".png") + ] + ) + self.n_scorers = len(os.listdir(mask_dir)) + self.scorers_tags = sorted(os.listdir(mask_dir)) + print(f"Scorer tags: {self.scorers_tags}") + self.n_classes = n_classes + self.on_epoch_end() + + def __len__(self): + return int(np.ceil(len(self.image_filenames) / self.batch_size)) + + def __getitem__(self, index): + batch_filenames = self.image_filenames[ + index * self.batch_size : (index + 1) * self.batch_size + ] + images, masks = self.__data_generation(batch_filenames) + return images, masks + + def on_epoch_end(self): + if self.shuffle: + np.random.shuffle(self.image_filenames) + + def visualize_sample( + self, + scorers: List[str], + batch_index=1, + sample_index=1, + ): + images, masks = self[batch_index] + + fig, axes = plt.subplots(len(scorers), self.n_classes + 1) + for scorer_num, scorer in enumerate(scorers): + for class_num in range(self.n_classes): + axes[scorer_num][0].imshow(images[sample_index].astype(int)) + axes[scorer_num][class_num + 1].imshow( + masks[sample_index, scorer_num, class_num] + ) + axes[scorer_num][0].axis("off") + axes[scorer_num][class_num + 1].axis("off") + axes[scorer_num][0].set_title(f"Image (ann {scorer})") + axes[scorer_num][class_num + 1].set_title(f"Class {class_num}") + + plt.show() + return fig + + def __data_generation(self, batch_filenames): + images = np.empty((self.batch_size, *self.image_size, 3)) + masks = np.empty( + ( + self.batch_size, + self.n_scorers, + self.n_classes, + *self.image_size, + ) + ) + + for batch, filename in enumerate(batch_filenames): + img_path = os.path.join(self.image_dir, filename) + for scorer, scorer_dir in enumerate(self.scorers_tags): + scorer_mask_dir = os.path.join(self.mask_dir, scorer_dir) + mask_path = os.path.join(scorer_mask_dir, filename) + if os.path.exists(mask_path): + mask_raw = load_img( + mask_path, + color_mode="grayscale", + target_size=self.image_size, + ) + mask = img_to_array(mask_raw) + for class_num in range(self.n_classes): + masks[batch][scorer][class_num] = np.where( + mask == class_num, 1, 0 + ).reshape(*self.image_size) + plt.show() + else: + LOGGER.warning( + ( + "Mask not found for scorer %s and image %s " + "Filling up with zeros." + ), + scorer_dir, + filename, + ) + masks[batch, scorer] = np.zeros((self.n_classes, *self.image_size)) + + image = load_img(img_path, target_size=self.image_size) + image = img_to_array(image) + + images[batch] = image + + return images, masks + + +if __name__ == "__main__": + val_gen = ImageDataGenerator( + image_dir="/home/brandon/unal/maestria/datasets/Histology Data/patches/Val", + mask_dir="/home/brandon/unal/maestria/datasets/Histology Data/masks/Val", + batch_size=16, + n_classes=6, + ) + print(f"Train len: {len(val_gen)}") + print(f"Train masks scorers: {val_gen.n_scorers}") + print(f"Train masks scorers tags: {val_gen.scorers_tags}") + val_gen.visualize_sample( + batch_index=8, + sample_index=8, + scorers=["NP8", "NP16", "NP21", "expert"], + ) diff --git a/core/seg_tgce/data/crowd_seg/map.py b/core/seg_tgce/data/crowd_seg/map.py index 093a758..be2e894 100644 --- a/core/seg_tgce/data/crowd_seg/map.py +++ b/core/seg_tgce/data/crowd_seg/map.py @@ -18,11 +18,3 @@ def find_annotators_alias(data_target: DataTarget, base_dir_path: Path) -> list[ path.name for path in data_target_path.iterdir() if path.is_dir() ] return annotators_alias - - -def produce_tf_dataset( - data_target: DataTarget, base_dir_path: Path, annotator_alias: str -) -> Path: - data_target_path = base_dir_path / MASKS_SUB_DIR / data_target.value - tf_image_folder = data_target_path / annotator_alias - return tf_image_folder diff --git a/core/seg_tgce/data/crowd_seg/visualizer.py b/core/seg_tgce/data/crowd_seg/visualizer.py index dee93aa..54f055d 100644 --- a/core/seg_tgce/data/crowd_seg/visualizer.py +++ b/core/seg_tgce/data/crowd_seg/visualizer.py @@ -1,14 +1,15 @@ import os -from typing import List, Tuple +from typing import Tuple + import matplotlib.pyplot as plt -import cv2 +from cv2 import imread # pylint: disable=no-name-in-module class BaseDirectoryNotFoundError(Exception): pass -def visualize_data( +def visualize_data( # pylint: disable=too-many-locals x_ini_values: Tuple[int, ...], y_ini_values: Tuple[int, ...], labelers: Tuple[str, str], @@ -30,14 +31,20 @@ def visualize_data( img_path = ( f"{base_path}/patches/Train/core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png" ) - non_expert_mask_path = f"{base_path}/masks/Train/{labelers[0]}/core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png" - expert_mask_path = f"{base_path}/masks/Train/{labelers[1]}/core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png" + non_expert_mask_path = ( + f"{base_path}/masks/Train/{labelers[0]}/" + f"core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png" + ) + expert_mask_path = ( + f"{base_path}/masks/Train/{labelers[1]}/" + f"core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png" + ) - im = cv2.imread(img_path) - non_expert_mask = cv2.imread(non_expert_mask_path, -1) - expert_mask = cv2.imread(expert_mask_path, -1) + img = imread(img_path) + non_expert_mask = imread(non_expert_mask_path, -1) + expert_mask = imread(expert_mask_path, -1) - axes[i, 0].imshow(im) + axes[i, 0].imshow(img) axes[i, 0].axis("off") axes[i, 1].imshow(non_expert_mask, cmap="Pastel1") axes[i, 1].axis("off") @@ -56,12 +63,12 @@ def visualize_data( x_ini_values = 1074, 1432, 2148 y_ini_values = 1074, 1432, 2148 labelers = "NP1", "expert" - base_path = "../../../datasets/Histology Data" - save_path = "../docs/source/resources/crowd-seg-example-instances.png" + BASE_PATH = "../../../datasets/Histology Data" + SAVE_PATH = "../docs/source/resources/crowd-seg-example-instances.png" visualize_data( x_ini_values=x_ini_values, y_ini_values=y_ini_values, labelers=labelers, - base_path=base_path, - save_path=save_path, + base_path=BASE_PATH, + save_path=SAVE_PATH, ) diff --git a/core/seg_tgce/py.typed b/core/seg_tgce/py.typed new file mode 100644 index 0000000..e69de29