Skip to content

Commit

Permalink
all\refac: #68 upd docs
Browse files Browse the repository at this point in the history
- update docs according to latest api usage
- minor change in default target img size
  • Loading branch information
blotero committed Jun 11, 2024
1 parent b3738ee commit 4adcd61
Show file tree
Hide file tree
Showing 9 changed files with 998 additions and 48 deletions.
6 changes: 4 additions & 2 deletions core/seg_tgce/data/crowd_seg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
from .generator import ImageDataGenerator
from .stage import Stage

DEFAULT_TARGET_SIZE = (512, 512)


def get_all_data(
image_size: Tuple[int, int] = (256, 256),
image_size: Tuple[int, int] = DEFAULT_TARGET_SIZE,
batch_size: int = 32,
shuffle: bool = False,
) -> Tuple[ImageDataGenerator, ...]:
Expand All @@ -26,7 +28,7 @@ def get_all_data(

def get_stage_data(
stage: Stage,
image_size: Tuple[int, int] = (256, 256),
image_size: Tuple[int, int] = DEFAULT_TARGET_SIZE,
batch_size: int = 32,
shuffle: bool = False,
) -> ImageDataGenerator:
Expand Down
10 changes: 7 additions & 3 deletions core/seg_tgce/data/crowd_seg/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
def main() -> None:
train, val, test = get_all_data(batch_size=8)
val.visualize_sample(batch_index=138, sample_indexes=[2, 3, 4, 5])
print(f"Train: {len(train)}")
print(f"Val: {len(val)}")
print(f"Test: {len(test)}")
print(f"Train: {len(train)} batches")
print(f"Val: {len(val)} batches")
print(f"Test: {len(test)} batches")

for i in range(1):
img, mask = train[i]
print(f"Batch {i}: {img.shape}, {mask.shape}")


main()
3 changes: 2 additions & 1 deletion core/seg_tgce/data/crowd_seg/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
4: "Benign Inflammation",
5: "Necrosis",
}
DEFAULT_IMG_SIZE = (512, 512)


class ScorerNotFoundError(Exception):
Expand Down Expand Up @@ -57,7 +58,7 @@ class ImageDataGenerator(Sequence):

def __init__( # pylint: disable=too-many-arguments
self,
image_size: Tuple[int, int] = (256, 256),
image_size: Tuple[int, int] = DEFAULT_IMG_SIZE,
batch_size: int = 32,
shuffle: bool = False,
stage: Stage = Stage.TRAIN,
Expand Down
16 changes: 9 additions & 7 deletions core/seg_tgce/loss/tgce.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,15 @@ def __init__(
super().__init__(name=name)

def call(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
"""
Calls loss function itself.
"""
y_true = cast(y_true, TARGET_DATA_TYPE)
y_pred = cast(y_pred, TARGET_DATA_TYPE)

y_pred = y_pred[..., : self.num_classes + self.num_annotators] # type:ignore
y_true = tf.reshape(
y_true, (y_true.shape[:-1]) + (self.num_classes, self.num_annotators)
)
lambda_r = y_pred[..., self.num_classes :] # type:ignore
y_pred_ = y_pred[..., : self.num_classes] # type:ignore
y_pred_ = y_pred[..., : self.num_classes]
n_samples, width, height, _ = y_pred_.shape
y_pred_ = y_pred_[..., tf.newaxis] # type:ignore
y_pred_ = tf.repeat(y_pred_, repeats=[self.num_annotators], axis=-1)
Expand All @@ -88,6 +90,7 @@ def call(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
),
axis=-2,
)

term_c = tf.math.multiply(
tf.ones([n_samples, width, height, self.num_annotators]) - lambda_r,
(
Expand All @@ -102,9 +105,8 @@ def call(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
)

loss = tf.math.reduce_mean(tf.math.multiply(lambda_r, term_r) + term_c)
loss = tf.where(tf.math.is_nan(loss), tf.constant(0.0), loss)
entropy_term = binary_entropy(y_true, y_pred_)
loss = tf.math.add(loss, self.gamma * entropy_term)
loss = tf.where(tf.math.is_nan(loss), tf.constant(1e-8), loss)

return loss

def get_config(
Expand Down
2 changes: 1 addition & 1 deletion core/seg_tgce/models/unet.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def kernel_initializer(seed: float) -> GlorotUniform:


def unet_tgce( # pylint: disable=too-many-statements
input_shape: Tuple[int, int, int] = (128, 128, 3),
input_shape: Tuple[int, int, int],
name: str = "UNET",
out_channels: int = 2,
n_scorers: int = 5,
Expand Down
881 changes: 881 additions & 0 deletions docs/poetry.lock

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions docs/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[tool.poetry]
name = "docs"
version = "0.1.0"
description = ""
authors = ["Brandon Lotero <[email protected]>"]
package-mode = false

[tool.poetry.dependencies]
python = "^3.11"
sphinx = "^7.3.7"
sphinx-rtd-theme = "^2.0.0"

[tool.poetry.group.dev.dependencies]
rstcheck = "^6.2.1"
rstfmt = "^0.0.14"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
45 changes: 24 additions & 21 deletions docs/source/contribution.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,37 +11,41 @@ practices according to your contribution target.

The ``core/`` directory contains all functionalities for training and
running the semantic segmentation model. Core behaves like a python
module. For contributing to this section, first setup your development
environment as follows:
module. For contributing to this section, first make sure you have
`poetry <https://python-poetry.org/docs/>`_ installed in your system,
then, set up your environment as follows:

.. code:: bash
cd core/
python -m venv .env
source .env/bin/activate
pip install -r requirements.txt
poetry install
poetry shell
When refactoring or adding new features, run tests locally with:
This will let you with an environment with all dependencies and a shell
session ready to go.

When contributing, run your tets locally with:

.. code:: bash
pytest .
Also, use ``pylint`` and ``mypy`` for linting code:
Also, use ``pylint`` and ``mypy`` for linting code. CI pipelines will
run these too and will fail if code quality is not 10/10:

.. code:: bash
pylint .
mypy .
pylint seg_tgce
mypy seg_tgce
Pylint should score your code 10/10 and mypy should find no issues.

Additionally, for formatting code, you can use ``isort`` and ``black``:

.. code:: bash
black .
isort .
black seg_tgce
isort --profile=black seg_tgce
******
Docs
Expand All @@ -53,17 +57,16 @@ documentation pages.
Development environment
=======================

Please setup your development environment with ``venv`` for python 3.11
as follows
Please setup your development environment with ``poetry`` for python
3.11 as follows

.. code:: bash
cd docs/
python -m venv .env
source .env/bin/activate
pip install -r requirements.txt
poetry install
poetry shell
Once your ``venv`` is ready, you can lint your pages after adding new
Once your environment is ready, you can lint your pages after adding new
content as follows:

.. code:: bash
Expand All @@ -79,19 +82,19 @@ Also, you can locally build doc pages with:
make html
Besides, if you want to apply formatting to your docs, you can use
``rstfmt``:
Please apply formatting to your docs for keeping up with the standard
with ``rstfmt``:

.. code:: bash
rstfmt -r source/
rstfmt source/
***********
Notebooks
***********

For setting up a local jupyter notebook, run the following (inside your
venv):
poetry environment):

.. code:: bash
Expand Down
63 changes: 50 additions & 13 deletions docs/source/experiments.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ layer for producing scorers with different lebels of agreement.
Crowd Seg Histopatological images
***********************************

Our second experiment was elaborated on the CrowdSeg dataset, which
consists of Triple Negative Breast Cancer images labeled by 20 medical
students.
Our second experiment was elaborated on the `CrowdSeg
<https://github.com/wizmik12/CRowd_Seg>`_ dataset, which consists of
Triple Negative Breast Cancer images labeled by 20 medical students.

This dataset fairly represents the original intention of the project,
which is to provide a tool for pathologists to segment histopathological
Expand All @@ -61,8 +61,52 @@ in the figure:
:align: center
:alt: Different labeling instances for three different patches of the CrowdSeg dataset.

Loading the dataset
===================
Fetching and loading the dataset
================================

You can use the simple API provided by the ``seg_tgce.data.crowd_seg``
for either Fetching all available data or a single stage (train, test of
val).

You can fetch your data simply like this:

.. code:: python
from seg_tgce.data.crowd_seg import get_all_data
train, val, test = get_all_data(batch_size=8)
for i in range(1):
img, mask = val[i]
print(f"Batch {i}: {img.shape}, {mask.shape}")
Output:

.. code:: text
Batch 0: (8, 512, 512, 3), (8, 512, 512, 6, 23)
A single stage can also be fetched, and even visualized:

.. code:: python
from seg_tgce.data.crowd_seg import get_stage_data
from seg_tgce.data.crowd_seg.stage import Stage
val = get_stage_data(stage = Stage.VAL, batch_size=8)
val.visualize_sample()
When running
the ``visualize_sample`` method, the generator will load the images and
masks from the disk and display them, with a result similar to the
following:

.. image:: resources/crowd-seg-generator-visualization.png
:width: 100%
:align: center
:alt: sample from the CrowdSeg dataset with the ``ImageDataGenerator`` class.

Loading the dataset manually
============================

If you already have a downloaded dataset in a certain directory, you can
load it symply as a keras sequence with the ``ImageDataGenerator``
Expand All @@ -88,12 +132,5 @@ class:
)
The ``ImageDataGenerator`` class is a subclass of the Keras ``Sequence``
class, which allows us to load the dataset in a lazy way. When running
the ``visualize_sample`` method, the generator will load the images and
masks from the disk and display them., with a result similar to the
following
class, which allows us to load the dataset in a lazy way.

.. image:: resources/crowd-seg-generator-visualization.png
:width: 100%
:align: center
:alt: sample from the CrowdSeg dataset with the ``ImageDataGenerator`` class.

0 comments on commit 4adcd61

Please sign in to comment.