Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

all\feat: #68 added crowd_seg metadata #90

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,816 changes: 893 additions & 923 deletions core/poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ repository = "https://github.com/blotero/seg_tgce"


[tool.poetry.dependencies]
python = ">=3.10,<3.12"
python = ">=3.11,<3.12"
numpy = "^1.26.4"
keras = "2.15.0"
tensorflow = "2.15.1"
matplotlib = "^3.8.4"
opencv-python = "^4.9.0.80"
tensorflow-datasets = "^4.9.4"
tensorflow-datasets = "4.9.6"
gdown = "4.6.3"
boto3 = "^1.34.130"

Expand Down
46 changes: 41 additions & 5 deletions core/seg_tgce/data/crowd_seg/generator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import logging
import os
from enum import Enum
from pathlib import Path
from typing import List, Optional, Tuple, TypedDict

import numpy as np
Expand All @@ -11,6 +13,8 @@
from tensorflow import argmax as tf_argmax
from tensorflow import reshape, transpose

from seg_tgce.data.crowd_seg.types import InvertedMetadataRecord

from .__retrieve import fetch_data, get_masks_dir, get_patches_dir
from .stage import Stage

Expand All @@ -26,6 +30,7 @@
5: "Necrosis",
}
DEFAULT_IMG_SIZE = (512, 512)
METADATA_PATH = Path(__file__).resolve().parent / "metadata"


class ScorerNotFoundError(Exception):
Expand Down Expand Up @@ -66,6 +71,7 @@ class ImageDataGenerator(Sequence): # pylint: disable=too-many-instance-attribu

def __init__( # pylint: disable=too-many-arguments
self,
*,
image_size: Tuple[int, int] = DEFAULT_IMG_SIZE,
batch_size: int = 32,
shuffle: bool = False,
Expand All @@ -89,6 +95,11 @@ def __init__( # pylint: disable=too-many-arguments
self.scorers_tags = sorted(os.listdir(mask_dir))
self.on_epoch_end()
self.schema = schema
self.scorers_db = {
filename: {scorer: False for scorer in self.scorers_tags}
for filename in self.image_filenames
}
self.stage = stage

@property
def classes_definition(self) -> dict[int, str]:
Expand Down Expand Up @@ -211,11 +222,6 @@ def __data_generation(self, batch_filenames: List[str]) -> Tuple[Tensor, Tensor]
mask == class_num, 1, 0
).reshape(*self.image_size)
else:
LOGGER.debug(
"Mask not found for scorer %s and image %s",
scorer_dir,
filename,
)
masks[batch, scorer, 0] = np.ones(self.image_size)
masks[batch, scorer, 1:] = np.zeros(
(self.n_classes - 1, *self.image_size)
Expand All @@ -227,3 +233,33 @@ def __data_generation(self, batch_filenames: List[str]) -> Tuple[Tensor, Tensor]
images[batch] = image

return images, transpose(masks, perm=[0, 3, 4, 2, 1])

def populate_metadata(self) -> None:
for filename in self.image_filenames:
for scorer in self.scorers_tags:
scorer_mask_dir = os.path.join(self.mask_dir, scorer)
mask_path = os.path.join(scorer_mask_dir, filename)
if os.path.exists(mask_path):
self.scorers_db[filename][scorer] = True

def store_metadata(self) -> None:
LOGGER.info("Storing scorers database...")
data_path = f"{METADATA_PATH}/{self.stage.name.lower()}_data.json"
inverted_path = f"{METADATA_PATH}/{self.stage.name.lower()}_inverted.json"
projected_data = {
filename: [key for key, value in file_data.items() if value]
for filename, file_data in self.scorers_db.items()
}
inverted_data: dict[str, InvertedMetadataRecord] = {
scorer: {"total": 0, "scored": []} for scorer in self.scorers_tags
}
for img_path, scorers in projected_data.items():
for scorer in scorers:
inverted_data[scorer]["total"] += 1
inverted_data[scorer]["scored"].append(img_path)

for data, json_path in zip(
[projected_data, dict(inverted_data)], [data_path, inverted_path]
):
with open(json_path, "w", newline="", encoding="utf-8") as json_file:
json.dump(data, json_file, indent=4)
Loading
Loading