Skip to content

Commit

Permalink
core\feat: #26 crowd seg generator
Browse files Browse the repository at this point in the history
- further definitions on crowd seg dataset generator
  • Loading branch information
blotero committed Apr 30, 2024
1 parent 161edc5 commit 726227f
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 21 deletions.
139 changes: 139 additions & 0 deletions core/seg_tgce/data/crowd_seg/generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import logging
import os
from typing import List

import numpy as np
from keras.preprocessing.image import img_to_array, load_img
from keras.utils import Sequence
from matplotlib import pyplot as plt

LOGGER = logging.getLogger(__name__)


class ImageDataGenerator(Sequence): # pylint: disable=too-many-instance-attributes
def __init__( # pylint: disable=too-many-arguments
self,
image_dir,
mask_dir,
n_classes,
image_size=(256, 256),
batch_size=32,
shuffle=True,
):
self.image_dir = image_dir
self.mask_dir = mask_dir
self.image_size = image_size
self.batch_size = batch_size
self.shuffle = shuffle
self.image_filenames = sorted(
[
filename
for filename in os.listdir(image_dir)
if filename.endswith(".png")
]
)
self.n_scorers = len(os.listdir(mask_dir))
self.scorers_tags = sorted(os.listdir(mask_dir))
print(f"Scorer tags: {self.scorers_tags}")
self.n_classes = n_classes
self.on_epoch_end()

def __len__(self):
return int(np.ceil(len(self.image_filenames) / self.batch_size))

def __getitem__(self, index):
batch_filenames = self.image_filenames[
index * self.batch_size : (index + 1) * self.batch_size
]
images, masks = self.__data_generation(batch_filenames)
return images, masks

def on_epoch_end(self):
if self.shuffle:
np.random.shuffle(self.image_filenames)

def visualize_sample(
self,
scorers: List[str],
batch_index=1,
sample_index=1,
):
images, masks = self[batch_index]

fig, axes = plt.subplots(len(scorers), self.n_classes + 1)
for scorer_num, scorer in enumerate(scorers):
for class_num in range(self.n_classes):
axes[scorer_num][0].imshow(images[sample_index].astype(int))
axes[scorer_num][class_num + 1].imshow(
masks[sample_index, scorer_num, class_num]
)
axes[scorer_num][0].axis("off")
axes[scorer_num][class_num + 1].axis("off")
axes[scorer_num][0].set_title(f"Image (ann {scorer})")
axes[scorer_num][class_num + 1].set_title(f"Class {class_num}")

plt.show()
return fig

def __data_generation(self, batch_filenames):
images = np.empty((self.batch_size, *self.image_size, 3))
masks = np.empty(
(
self.batch_size,
self.n_scorers,
self.n_classes,
*self.image_size,
)
)

for batch, filename in enumerate(batch_filenames):
img_path = os.path.join(self.image_dir, filename)
for scorer, scorer_dir in enumerate(self.scorers_tags):
scorer_mask_dir = os.path.join(self.mask_dir, scorer_dir)
mask_path = os.path.join(scorer_mask_dir, filename)
if os.path.exists(mask_path):
mask_raw = load_img(
mask_path,
color_mode="grayscale",
target_size=self.image_size,
)
mask = img_to_array(mask_raw)
for class_num in range(self.n_classes):
masks[batch][scorer][class_num] = np.where(
mask == class_num, 1, 0
).reshape(*self.image_size)
plt.show()
else:
LOGGER.warning(
(
"Mask not found for scorer %s and image %s "
"Filling up with zeros."
),
scorer_dir,
filename,
)
masks[batch, scorer] = np.zeros((self.n_classes, *self.image_size))

image = load_img(img_path, target_size=self.image_size)
image = img_to_array(image)

images[batch] = image

return images, masks


if __name__ == "__main__":
val_gen = ImageDataGenerator(
image_dir="/home/brandon/unal/maestria/datasets/Histology Data/patches/Val",
mask_dir="/home/brandon/unal/maestria/datasets/Histology Data/masks/Val",
batch_size=16,
n_classes=6,
)
print(f"Train len: {len(val_gen)}")
print(f"Train masks scorers: {val_gen.n_scorers}")
print(f"Train masks scorers tags: {val_gen.scorers_tags}")
val_gen.visualize_sample(
batch_index=8,
sample_index=8,
scorers=["NP8", "NP16", "NP21", "expert"],
)
8 changes: 0 additions & 8 deletions core/seg_tgce/data/crowd_seg/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,3 @@ def find_annotators_alias(data_target: DataTarget, base_dir_path: Path) -> list[
path.name for path in data_target_path.iterdir() if path.is_dir()
]
return annotators_alias


def produce_tf_dataset(
data_target: DataTarget, base_dir_path: Path, annotator_alias: str
) -> Path:
data_target_path = base_dir_path / MASKS_SUB_DIR / data_target.value
tf_image_folder = data_target_path / annotator_alias
return tf_image_folder
33 changes: 20 additions & 13 deletions core/seg_tgce/data/crowd_seg/visualizer.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import os
from typing import List, Tuple
from typing import Tuple

import matplotlib.pyplot as plt
import cv2
from cv2 import imread # pylint: disable=no-name-in-module


class BaseDirectoryNotFoundError(Exception):
pass


def visualize_data(
def visualize_data( # pylint: disable=too-many-locals
x_ini_values: Tuple[int, ...],
y_ini_values: Tuple[int, ...],
labelers: Tuple[str, str],
Expand All @@ -30,14 +31,20 @@ def visualize_data(
img_path = (
f"{base_path}/patches/Train/core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png"
)
non_expert_mask_path = f"{base_path}/masks/Train/{labelers[0]}/core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png"
expert_mask_path = f"{base_path}/masks/Train/{labelers[1]}/core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png"
non_expert_mask_path = (
f"{base_path}/masks/Train/{labelers[0]}/"
f"core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png"
)
expert_mask_path = (
f"{base_path}/masks/Train/{labelers[1]}/"
f"core_A0AL_AN_x_ini_{x_ini}_y_ini_{y_ini}.png"
)

im = cv2.imread(img_path)
non_expert_mask = cv2.imread(non_expert_mask_path, -1)
expert_mask = cv2.imread(expert_mask_path, -1)
img = imread(img_path)
non_expert_mask = imread(non_expert_mask_path, -1)
expert_mask = imread(expert_mask_path, -1)

axes[i, 0].imshow(im)
axes[i, 0].imshow(img)
axes[i, 0].axis("off")
axes[i, 1].imshow(non_expert_mask, cmap="Pastel1")
axes[i, 1].axis("off")
Expand All @@ -56,12 +63,12 @@ def visualize_data(
x_ini_values = 1074, 1432, 2148
y_ini_values = 1074, 1432, 2148
labelers = "NP1", "expert"
base_path = "../../../datasets/Histology Data"
save_path = "../docs/source/resources/crowd-seg-example-instances.png"
BASE_PATH = "../../../datasets/Histology Data"
SAVE_PATH = "../docs/source/resources/crowd-seg-example-instances.png"
visualize_data(
x_ini_values=x_ini_values,
y_ini_values=y_ini_values,
labelers=labelers,
base_path=base_path,
save_path=save_path,
base_path=BASE_PATH,
save_path=SAVE_PATH,
)
Empty file added core/seg_tgce/py.typed
Empty file.

0 comments on commit 726227f

Please sign in to comment.