all\refac: #68 upd docs

- update docs according to latest api usage - minor change in default target img size
blotero · Jun 11, 2024 · 4adcd61 · 4adcd61
1 parent b3738ee
commit 4adcd61
Show file tree

Hide file tree

Showing 9 changed files with 998 additions and 48 deletions.
diff --git a/core/seg_tgce/data/crowd_seg/__init__.py b/core/seg_tgce/data/crowd_seg/__init__.py
@@ -3,9 +3,11 @@
 from .generator import ImageDataGenerator
 from .stage import Stage
 
+DEFAULT_TARGET_SIZE = (512, 512)
+
 
 def get_all_data(
-    image_size: Tuple[int, int] = (256, 256),
+    image_size: Tuple[int, int] = DEFAULT_TARGET_SIZE,
     batch_size: int = 32,
     shuffle: bool = False,
 ) -> Tuple[ImageDataGenerator, ...]:
@@ -26,7 +28,7 @@ def get_all_data(
 
 def get_stage_data(
     stage: Stage,
-    image_size: Tuple[int, int] = (256, 256),
+    image_size: Tuple[int, int] = DEFAULT_TARGET_SIZE,
     batch_size: int = 32,
     shuffle: bool = False,
 ) -> ImageDataGenerator:

diff --git a/core/seg_tgce/data/crowd_seg/__main__.py b/core/seg_tgce/data/crowd_seg/__main__.py
@@ -4,9 +4,13 @@
 def main() -> None:
     train, val, test = get_all_data(batch_size=8)
     val.visualize_sample(batch_index=138, sample_indexes=[2, 3, 4, 5])
-    print(f"Train: {len(train)}")
-    print(f"Val: {len(val)}")
-    print(f"Test: {len(test)}")
+    print(f"Train: {len(train)} batches")
+    print(f"Val: {len(val)} batches")
+    print(f"Test: {len(test)} batches")
+
+    for i in range(1):
+        img, mask = train[i]
+        print(f"Batch {i}: {img.shape}, {mask.shape}")
 
 
 main()
diff --git a/core/seg_tgce/data/crowd_seg/generator.py b/core/seg_tgce/data/crowd_seg/generator.py
@@ -24,6 +24,7 @@
     4: "Benign Inflammation",
     5: "Necrosis",
 }
+DEFAULT_IMG_SIZE = (512, 512)
 
 
 class ScorerNotFoundError(Exception):
@@ -57,7 +58,7 @@ class ImageDataGenerator(Sequence):
 
     def __init__(  # pylint: disable=too-many-arguments
         self,
-        image_size: Tuple[int, int] = (256, 256),
+        image_size: Tuple[int, int] = DEFAULT_IMG_SIZE,
         batch_size: int = 32,
         shuffle: bool = False,
         stage: Stage = Stage.TRAIN,

diff --git a/core/seg_tgce/loss/tgce.py b/core/seg_tgce/loss/tgce.py
@@ -55,13 +55,15 @@ def __init__(
         super().__init__(name=name)
 
     def call(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
-        """
-        Calls loss function itself.
-        """
         y_true = cast(y_true, TARGET_DATA_TYPE)
         y_pred = cast(y_pred, TARGET_DATA_TYPE)
+
+        y_pred = y_pred[..., : self.num_classes + self.num_annotators]  # type:ignore
+        y_true = tf.reshape(
+            y_true, (y_true.shape[:-1]) + (self.num_classes, self.num_annotators)
+        )
         lambda_r = y_pred[..., self.num_classes :]  # type:ignore
-        y_pred_ = y_pred[..., : self.num_classes]  # type:ignore
+        y_pred_ = y_pred[..., : self.num_classes]
         n_samples, width, height, _ = y_pred_.shape
         y_pred_ = y_pred_[..., tf.newaxis]  # type:ignore
         y_pred_ = tf.repeat(y_pred_, repeats=[self.num_annotators], axis=-1)
@@ -88,6 +90,7 @@ def call(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
             ),
             axis=-2,
         )
+
         term_c = tf.math.multiply(
             tf.ones([n_samples, width, height, self.num_annotators]) - lambda_r,
             (
@@ -102,9 +105,8 @@ def call(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
         )
 
         loss = tf.math.reduce_mean(tf.math.multiply(lambda_r, term_r) + term_c)
-        loss = tf.where(tf.math.is_nan(loss), tf.constant(0.0), loss)
-        entropy_term = binary_entropy(y_true, y_pred_)
-        loss = tf.math.add(loss, self.gamma * entropy_term)
+        loss = tf.where(tf.math.is_nan(loss), tf.constant(1e-8), loss)
+
         return loss
 
     def get_config(

diff --git a/core/seg_tgce/models/unet.py b/core/seg_tgce/models/unet.py
@@ -25,7 +25,7 @@ def kernel_initializer(seed: float) -> GlorotUniform:
 
 
 def unet_tgce(  # pylint: disable=too-many-statements
-    input_shape: Tuple[int, int, int] = (128, 128, 3),
+    input_shape: Tuple[int, int, int],
     name: str = "UNET",
     out_channels: int = 2,
     n_scorers: int = 5,

diff --git a/docs/poetry.lock b/docs/poetry.lock
diff --git a/docs/pyproject.toml b/docs/pyproject.toml
@@ -0,0 +1,20 @@
+[tool.poetry]
+name = "docs"
+version = "0.1.0"
+description = ""
+authors = ["Brandon Lotero <[email protected]>"]
+package-mode = false
+
+[tool.poetry.dependencies]
+python = "^3.11"
+sphinx = "^7.3.7"
+sphinx-rtd-theme = "^2.0.0"
+
+[tool.poetry.group.dev.dependencies]
+rstcheck = "^6.2.1"
+rstfmt = "^0.0.14"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/docs/source/contribution.rst b/docs/source/contribution.rst
@@ -11,37 +11,41 @@ practices according to your contribution target.
 
 The ``core/`` directory contains all functionalities for training and
 running the semantic segmentation model. Core behaves like a python
-module. For contributing to this section, first setup your development
-environment as follows:
+module. For contributing to this section, first make sure you have
+`poetry <https://python-poetry.org/docs/>`_ installed in your system,
+then, set up your environment as follows:
 
 .. code:: bash
 
    cd core/
-   python -m venv .env
-   source .env/bin/activate
-   pip install -r requirements.txt
+   poetry install
+   poetry shell
 
-When refactoring or adding new features, run tests locally with:
+This will let you with an environment with all dependencies and a shell
+session ready to go.
+
+When contributing, run your tets locally with:
 
 .. code:: bash
 
    pytest .
 
-Also, use ``pylint`` and ``mypy`` for linting code:
+Also, use ``pylint`` and ``mypy`` for linting code. CI pipelines will
+run these too and will fail if code quality is not 10/10:
 
 .. code:: bash
 
-   pylint .
-   mypy .
+   pylint seg_tgce
+   mypy seg_tgce
 
 Pylint should score your code 10/10 and mypy should find no issues.
 
 Additionally, for formatting code, you can use ``isort`` and ``black``:
 
 .. code:: bash
 
-   black .
-   isort .
+   black seg_tgce
+   isort --profile=black seg_tgce
 
 ******
  Docs
@@ -53,17 +57,16 @@ documentation pages.
 Development environment
 =======================
 
-Please setup your development environment with ``venv`` for python 3.11
-as follows
+Please setup your development environment with ``poetry`` for python
+3.11 as follows
 
 .. code:: bash
 
    cd docs/
-   python -m venv .env
-   source .env/bin/activate
-   pip install -r requirements.txt
+   poetry install
+   poetry shell
 
-Once your ``venv`` is ready, you can lint your pages after adding new
+Once your environment is ready, you can lint your pages after adding new
 content as follows:
 
 .. code:: bash
@@ -79,19 +82,19 @@ Also, you can locally build doc pages with:
 
    make html
 
-Besides, if you want to apply formatting to your docs, you can use
-``rstfmt``:
+Please apply formatting to your docs for keeping up with the standard
+with ``rstfmt``:
 
 .. code:: bash
 
-   rstfmt -r source/
+   rstfmt source/
 
 ***********
  Notebooks
 ***********
 
 For setting up a local jupyter notebook, run the following (inside your
-venv):
+poetry environment):
 
 .. code:: bash
 

diff --git a/docs/source/experiments.rst b/docs/source/experiments.rst
@@ -43,9 +43,9 @@ layer for producing scorers with different lebels of agreement.
  Crowd Seg Histopatological images
 ***********************************
 
-Our second experiment was elaborated on the CrowdSeg dataset, which
-consists of Triple Negative Breast Cancer images labeled by 20 medical
-students.
+Our second experiment was elaborated on the `CrowdSeg
+<https://github.com/wizmik12/CRowd_Seg>`_ dataset, which consists of
+Triple Negative Breast Cancer images labeled by 20 medical students.
 
 This dataset fairly represents the original intention of the project,
 which is to provide a tool for pathologists to segment histopathological
@@ -61,8 +61,52 @@ in the figure:
    :align: center
    :alt: Different labeling instances for three different patches of the CrowdSeg dataset.
 
-Loading the dataset
-===================
+Fetching and loading the dataset
+================================
+
+You can use the simple API provided by the ``seg_tgce.data.crowd_seg``
+for either Fetching all available data or a single stage (train, test of
+val).
+
+You can fetch your data simply like this:
+
+.. code:: python
+
+   from seg_tgce.data.crowd_seg import get_all_data
+
+   train, val, test = get_all_data(batch_size=8)
+   for i in range(1):
+      img, mask = val[i]
+      print(f"Batch {i}: {img.shape}, {mask.shape}")
+      
+Output: 
+
+.. code:: text
+
+      Batch 0: (8, 512, 512, 3), (8, 512, 512, 6, 23)
+
+A single stage can also be fetched, and even visualized:
+
+.. code:: python
+
+   from seg_tgce.data.crowd_seg import get_stage_data
+   from seg_tgce.data.crowd_seg.stage import Stage
+
+   val = get_stage_data(stage = Stage.VAL, batch_size=8)
+   val.visualize_sample()
+
+When running
+the ``visualize_sample`` method, the generator will load the images and
+masks from the disk and display them, with a result similar to the
+following:
+
+.. image:: resources/crowd-seg-generator-visualization.png
+   :width: 100%
+   :align: center
+   :alt: sample from the CrowdSeg dataset with the ``ImageDataGenerator`` class.
+
+Loading the dataset manually
+============================
 
 If you already have a downloaded dataset in a certain directory, you can
 load it symply as a keras sequence with the ``ImageDataGenerator``
@@ -88,12 +132,5 @@ class:
    )
 
 The ``ImageDataGenerator`` class is a subclass of the Keras ``Sequence``
-class, which allows us to load the dataset in a lazy way. When running
-the ``visualize_sample`` method, the generator will load the images and
-masks from the disk and display them., with a result similar to the
-following
+class, which allows us to load the dataset in a lazy way. 
 
-.. image:: resources/crowd-seg-generator-visualization.png
-   :width: 100%
-   :align: center
-   :alt: sample from the CrowdSeg dataset with the ``ImageDataGenerator`` class.