From cae51e9a3f08988014bd480a87b221a07bb808dd Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 16 May 2023 16:48:59 +0100 Subject: [PATCH 1/9] Add check_pixels.py script --- scripts/check_pixels.py | 146 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 scripts/check_pixels.py diff --git a/scripts/check_pixels.py b/scripts/check_pixels.py new file mode 100644 index 00000000..39325268 --- /dev/null +++ b/scripts/check_pixels.py @@ -0,0 +1,146 @@ + +# Check that the pixel values for images on logged-in server exactly +# match the values for same images (same IDs) on IDR server. +# Used to validate that updating IDR (e.g. swap Fileset for NGFF or +# different Bio-Formats reader etc) doesn't change pixel values + +import argparse +import sys +import numpy as np +from datetime import datetime + +import omero +from omero.cli import cli_login +from omero.gateway import BlitzGateway + + +def log(message, log_file): + with open(log_file, 'a') as f: + f.write(message) + f.write("\n") + + +def check_image(idr_conn, image, log_file, max_planes): + + try: + sizeZ = image.getSizeZ() + sizeC = image.getSizeC() + sizeT = image.getSizeT() + zctList = [] + for t in range(sizeT): + for z in range(sizeZ): + for c in range(sizeC): + if len(zctList) < max_planes or max_planes == 0: + zctList.append( (z,c,t) ) + + idr_image = idr_conn.getObject("Image", image.id) + if idr_image is None: + log("Error: Image not found on IDR: %s" % image.id, log_file) + + planes = image.getPrimaryPixels().getPlanes(zctList) + idr_planes = idr_image.getPrimaryPixels().getPlanes(zctList) + + for plane, idr_plane, idx in zip(planes, idr_planes, zctList): + if not np.array_equiv(plane, idr_plane): + log("Error: Mismatch for Image: %s at plane (z, c, t): %s" % (image.id, idx), + log_file) + except Exception as ex: + log("Error: Image:%s %s" % (image.id, ex.message), log_file) + + +def get_object(conn, obj_string): + for dtype in ["Screen", "Plate", "Project", "Dataset", "Image"]: + if obj_string.startswith(dtype): + obj_id = int(obj_string.replace(dtype + ":", "")) + obj = conn.getObject(dtype, obj_id) + if obj is None: + print(obj_string, "not found!") + return obj + + +def get_plate_images(plate): + images = [] + for well in plate.listChildren(): + for ws in well.listChildren(): + images.append(ws.getImage()) + return images + + +def get_images(conn, obj_string): + obj = get_object(conn, obj_string) + + images = [] + if obj_string.startswith("Screen"): + for plate in obj.listChildren(): + images.extend(get_plate_images(plate)) + + elif obj_string.startswith("Plate"): + images.extend(get_plate_images(obj)) + + elif obj_string.startswith("Project"): + for dataset in obj.listChildren(): + images.extend(list(dataset.listChildren())) + + elif obj_string.startswith("Dataset"): + images.extend(list(obj.listChildren())) + + elif obj_string.startswith("Image"): + images.append(obj) + + images.sort(key=lambda x:x.id) + return images + + +def main(argv): + """ + Swaps Fileset from 'Old Object' to 'New Object'. + For all the Images in the 'Old Object' (Screen, Plate, Image or Fileset), we swap the + Fileset to use the Fileset in the 'New Object'. Images in the `New Object` are left + unlinked to any Fileset, and can then be deleted. + Also prints an sql command(s) to update the pixels in the NEW Images only. + For Screens containing multiple Plates (Filesets), we match the Plates by Name + """ + parser = argparse.ArgumentParser() + parser.add_argument('object', help='Object:ID where Object is Screen, Plate, Project, Dataset, Image') + parser.add_argument('logfile', help='File path to output log') + parser.add_argument('--max-planes', type=int, default=0, + help='Max number of planes to check per image. Default is to check ALL') + args = parser.parse_args(argv) + + max_planes = args.max_planes + obj_string = args.object + start_time = datetime.now() + log("Start: %s" % start_time, args.logfile) + log("Checking %s" % obj_string, args.logfile) + log('max_planes: %s' % max_planes, args.logfile) + + with cli_login() as cli: + conn = BlitzGateway(client_obj=cli._client) + assert ":" in obj_string + + # Create connection to IDR server + # NB: conn.connect() not working on IDR. Do it like this + idr_client = omero.client(host="idr.openmicroscopy.org", port=4064) + idr_client.createSession('public', 'public') + idr_conn = BlitzGateway(client_obj=idr_client) + + images = get_images(conn, obj_string) + idr_images = get_images(idr_conn, obj_string) + + # Check all images in IDR are also in local server + img_ids = [img.id for img in images] + idr_ids = [img.id for img in idr_images] + if not img_ids == idr_ids: + log("Error: Different Image IDs: %s" % list(set(idr_ids) - set(img_ids)), args.logfile) + + # Compare pixel values... + total = len(images) + for count, image in enumerate(images): + log("%s/%s Check Image:%s %s" % (count, total, image.id, image.name), args.logfile) + check_image(idr_conn, image, args.logfile, max_planes) + + log("End: %s" % datetime.now(), args.logfile) + +if __name__ == '__main__': + main(sys.argv[1:]) + From b77c1f608788c7fa091195216064e9859c9e7b37 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 10 Nov 2023 17:16:35 +0000 Subject: [PATCH 2/9] Add --max-images argument. Remove logfile --- scripts/check_pixels.py | 51 ++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/scripts/check_pixels.py b/scripts/check_pixels.py index 39325268..f8c5d640 100644 --- a/scripts/check_pixels.py +++ b/scripts/check_pixels.py @@ -14,38 +14,36 @@ from omero.gateway import BlitzGateway -def log(message, log_file): - with open(log_file, 'a') as f: - f.write(message) - f.write("\n") +def log(message): + print(message) -def check_image(idr_conn, image, log_file, max_planes): +def check_image(idr_conn, image, max_planes): try: sizeZ = image.getSizeZ() sizeC = image.getSizeC() sizeT = image.getSizeT() zctList = [] - for t in range(sizeT): + for c in range(sizeC): for z in range(sizeZ): - for c in range(sizeC): + for t in range(sizeT): if len(zctList) < max_planes or max_planes == 0: zctList.append( (z,c,t) ) idr_image = idr_conn.getObject("Image", image.id) if idr_image is None: - log("Error: Image not found on IDR: %s" % image.id, log_file) + log("Error: Image not found on IDR: %s" % image.id) + return planes = image.getPrimaryPixels().getPlanes(zctList) idr_planes = idr_image.getPrimaryPixels().getPlanes(zctList) for plane, idr_plane, idx in zip(planes, idr_planes, zctList): if not np.array_equiv(plane, idr_plane): - log("Error: Mismatch for Image: %s at plane (z, c, t): %s" % (image.id, idx), - log_file) + log("Error: Mismatch for Image: %s at plane (z, c, t): %s" % (image.id, idx)) except Exception as ex: - log("Error: Image:%s %s" % (image.id, ex.message), log_file) + log("Error: Image:%s %s" % (image.id, ex.message)) def get_object(conn, obj_string): @@ -102,17 +100,21 @@ def main(argv): """ parser = argparse.ArgumentParser() parser.add_argument('object', help='Object:ID where Object is Screen, Plate, Project, Dataset, Image') - parser.add_argument('logfile', help='File path to output log') + # parser.add_argument('logfile', help='File path to output log') + parser.add_argument('--max-images', type=int, default=0, + help='Max number of images to check. Default is to check ALL') parser.add_argument('--max-planes', type=int, default=0, help='Max number of planes to check per image. Default is to check ALL') args = parser.parse_args(argv) + max_images = args.max_images max_planes = args.max_planes obj_string = args.object start_time = datetime.now() - log("Start: %s" % start_time, args.logfile) - log("Checking %s" % obj_string, args.logfile) - log('max_planes: %s' % max_planes, args.logfile) + log("Start: %s" % start_time) + log("Checking %s" % obj_string) + log('max_planes: %s' % max_planes) + log('max_images: %s' % max_images) with cli_login() as cli: conn = BlitzGateway(client_obj=cli._client) @@ -125,21 +127,24 @@ def main(argv): idr_conn = BlitzGateway(client_obj=idr_client) images = get_images(conn, obj_string) - idr_images = get_images(idr_conn, obj_string) + # idr_images = get_images(idr_conn, obj_string) # Check all images in IDR are also in local server - img_ids = [img.id for img in images] - idr_ids = [img.id for img in idr_images] - if not img_ids == idr_ids: - log("Error: Different Image IDs: %s" % list(set(idr_ids) - set(img_ids)), args.logfile) + # img_ids = [img.id for img in images] + # idr_ids = [img.id for img in idr_images] + # if not img_ids == idr_ids: + # log("Error: Different Image IDs: %s" % list(set(idr_ids) - set(img_ids))) # Compare pixel values... total = len(images) for count, image in enumerate(images): - log("%s/%s Check Image:%s %s" % (count, total, image.id, image.name), args.logfile) - check_image(idr_conn, image, args.logfile, max_planes) + if count > max_images: + log("Checked max images... Done") + break + log("%s/%s Check Image:%s %s" % (count, total, image.id, image.name)) + check_image(idr_conn, image, max_planes) - log("End: %s" % datetime.now(), args.logfile) + log("End: %s" % datetime.now()) if __name__ == '__main__': main(sys.argv[1:]) From 66867d1e65cc89ef0d459c33316d71f748159474 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 10 Nov 2023 17:29:33 +0000 Subject: [PATCH 3/9] max-images is per Plate/Fileset --- scripts/check_pixels.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/scripts/check_pixels.py b/scripts/check_pixels.py index f8c5d640..13521855 100644 --- a/scripts/check_pixels.py +++ b/scripts/check_pixels.py @@ -56,24 +56,26 @@ def get_object(conn, obj_string): return obj -def get_plate_images(plate): +def get_plate_images(plate, max_images): images = [] for well in plate.listChildren(): for ws in well.listChildren(): images.append(ws.getImage()) + if max_images and len(images) >= max_images: + return images return images -def get_images(conn, obj_string): +def get_images(conn, obj_string, max_images=0): obj = get_object(conn, obj_string) images = [] if obj_string.startswith("Screen"): for plate in obj.listChildren(): - images.extend(get_plate_images(plate)) + images.extend(get_plate_images(plate, max_images)) elif obj_string.startswith("Plate"): - images.extend(get_plate_images(obj)) + images.extend(get_plate_images(obj, max_images)) elif obj_string.startswith("Project"): for dataset in obj.listChildren(): @@ -102,7 +104,7 @@ def main(argv): parser.add_argument('object', help='Object:ID where Object is Screen, Plate, Project, Dataset, Image') # parser.add_argument('logfile', help='File path to output log') parser.add_argument('--max-images', type=int, default=0, - help='Max number of images to check. Default is to check ALL') + help='Max number of images per FILESET. Default is to check ALL') parser.add_argument('--max-planes', type=int, default=0, help='Max number of planes to check per image. Default is to check ALL') args = parser.parse_args(argv) @@ -126,21 +128,18 @@ def main(argv): idr_client.createSession('public', 'public') idr_conn = BlitzGateway(client_obj=idr_client) - images = get_images(conn, obj_string) - # idr_images = get_images(idr_conn, obj_string) + images = get_images(conn, obj_string, max_images) + idr_images = get_images(idr_conn, obj_string) # Check all images in IDR are also in local server - # img_ids = [img.id for img in images] - # idr_ids = [img.id for img in idr_images] - # if not img_ids == idr_ids: - # log("Error: Different Image IDs: %s" % list(set(idr_ids) - set(img_ids))) + img_ids = [img.id for img in images] + idr_ids = [img.id for img in idr_images] + if not img_ids == idr_ids: + log("Error: Different Image IDs: %s" % list(set(idr_ids) - set(img_ids))) # Compare pixel values... total = len(images) for count, image in enumerate(images): - if count > max_images: - log("Checked max images... Done") - break log("%s/%s Check Image:%s %s" % (count, total, image.id, image.name)) check_image(idr_conn, image, max_planes) From a141fb62ff37a94ed84f3deb8cd260e31998aecf Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 22 Nov 2023 11:52:19 +0000 Subject: [PATCH 4/9] Don't assume exception has message --- scripts/check_pixels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check_pixels.py b/scripts/check_pixels.py index 13521855..50dfa542 100644 --- a/scripts/check_pixels.py +++ b/scripts/check_pixels.py @@ -43,7 +43,7 @@ def check_image(idr_conn, image, max_planes): if not np.array_equiv(plane, idr_plane): log("Error: Mismatch for Image: %s at plane (z, c, t): %s" % (image.id, idx)) except Exception as ex: - log("Error: Image:%s %s" % (image.id, ex.message)) + log("Error: Image:%s %s" % (image.id, ex)) def get_object(conn, obj_string): From 2fbe5f58bf5b99b9275ecfa6dddd09d09172c56b Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 22 Nov 2023 11:53:30 +0000 Subject: [PATCH 5/9] Use max_images on IDR data to match test data --- scripts/check_pixels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check_pixels.py b/scripts/check_pixels.py index 50dfa542..d18965fe 100644 --- a/scripts/check_pixels.py +++ b/scripts/check_pixels.py @@ -129,7 +129,7 @@ def main(argv): idr_conn = BlitzGateway(client_obj=idr_client) images = get_images(conn, obj_string, max_images) - idr_images = get_images(idr_conn, obj_string) + idr_images = get_images(idr_conn, obj_string, max_images) # Check all images in IDR are also in local server img_ids = [img.id for img in images] From 250f35c8d8a852b00fd2e855b19470ccfeffd5d0 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 27 Nov 2023 12:02:39 +0000 Subject: [PATCH 6/9] Add support for --max-planes=sizeC --- scripts/check_pixels.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/scripts/check_pixels.py b/scripts/check_pixels.py index d18965fe..b8e2a724 100644 --- a/scripts/check_pixels.py +++ b/scripts/check_pixels.py @@ -24,11 +24,19 @@ def check_image(idr_conn, image, max_planes): sizeZ = image.getSizeZ() sizeC = image.getSizeC() sizeT = image.getSizeT() + if max_planes is not None: + # 'sizeC' or number + if max_planes == "sizeC": + max_planes = sizeC + else: + max_planes = int(max_planes) + zctList = [] - for c in range(sizeC): - for z in range(sizeZ): - for t in range(sizeT): - if len(zctList) < max_planes or max_planes == 0: + # if max_planes == sizeC, we'll get 1 plane from each Channel + for z in range(sizeZ): + for t in range(sizeT): + for c in range(sizeC): + if max_planes is None or len(zctList) < max_planes: zctList.append( (z,c,t) ) idr_image = idr_conn.getObject("Image", image.id) @@ -105,8 +113,8 @@ def main(argv): # parser.add_argument('logfile', help='File path to output log') parser.add_argument('--max-images', type=int, default=0, help='Max number of images per FILESET. Default is to check ALL') - parser.add_argument('--max-planes', type=int, default=0, - help='Max number of planes to check per image. Default is to check ALL') + parser.add_argument('--max-planes', + help='Max number of planes to check per image or sizeC to check 1 from each Channel. Default is to check ALL') args = parser.parse_args(argv) max_images = args.max_images From dfb9de81e1634ab8caf01341e48329bbbaf350eb Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 12 Dec 2023 10:06:36 +0000 Subject: [PATCH 7/9] Include image.name in Error logs --- scripts/check_pixels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/check_pixels.py b/scripts/check_pixels.py index b8e2a724..3715acd5 100644 --- a/scripts/check_pixels.py +++ b/scripts/check_pixels.py @@ -49,9 +49,9 @@ def check_image(idr_conn, image, max_planes): for plane, idr_plane, idx in zip(planes, idr_planes, zctList): if not np.array_equiv(plane, idr_plane): - log("Error: Mismatch for Image: %s at plane (z, c, t): %s" % (image.id, idx)) + log("Error: Mismatch for Image:%s %s at plane (z, c, t): %s" % (image.id, image.name, idx)) except Exception as ex: - log("Error: Image:%s %s" % (image.id, ex)) + log("Error: Image:%s %s %s" % (image.id, image.name, ex)) def get_object(conn, obj_string): From 137774a793f113790318d485ce45009a08538615 Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 18 Jan 2024 13:11:59 +0000 Subject: [PATCH 8/9] Add --timing option to check times for getPlanes() --- scripts/check_pixels.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/scripts/check_pixels.py b/scripts/check_pixels.py index 3715acd5..6403caa0 100644 --- a/scripts/check_pixels.py +++ b/scripts/check_pixels.py @@ -18,7 +18,7 @@ def log(message): print(message) -def check_image(idr_conn, image, max_planes): +def check_image(idr_conn, image, max_planes, check_timing=False): try: sizeZ = image.getSizeZ() @@ -44,8 +44,18 @@ def check_image(idr_conn, image, max_planes): log("Error: Image not found on IDR: %s" % image.id) return - planes = image.getPrimaryPixels().getPlanes(zctList) - idr_planes = idr_image.getPrimaryPixels().getPlanes(zctList) + start_local = datetime.now() + planes = list(image.getPrimaryPixels().getPlanes(zctList)) + planes_time_local = datetime.now() - start_local + + start_idr = datetime.now() + idr_planes = list(idr_image.getPrimaryPixels().getPlanes(zctList)) + planes_time_idr = datetime.now() - start_idr + + if check_timing: + ratio = planes_time_local.microseconds / planes_time_idr.microseconds + log("Ratio of local/IDR timing for %s planes is %s Image: %s" % (len(zctList), ratio, image.id)) + log("Local took %s, IDR took %s" % (planes_time_local, planes_time_idr)) for plane, idr_plane, idx in zip(planes, idr_planes, zctList): if not np.array_equiv(plane, idr_plane): @@ -115,6 +125,7 @@ def main(argv): help='Max number of images per FILESET. Default is to check ALL') parser.add_argument('--max-planes', help='Max number of planes to check per image or sizeC to check 1 from each Channel. Default is to check ALL') + parser.add_argument('--timing', action="store_true", help="print timing difference between local and IDR") args = parser.parse_args(argv) max_images = args.max_images @@ -125,6 +136,7 @@ def main(argv): log("Checking %s" % obj_string) log('max_planes: %s' % max_planes) log('max_images: %s' % max_images) + log('check timing: %s' % args.timing) with cli_login() as cli: conn = BlitzGateway(client_obj=cli._client) @@ -149,7 +161,7 @@ def main(argv): total = len(images) for count, image in enumerate(images): log("%s/%s Check Image:%s %s" % (count, total, image.id, image.name)) - check_image(idr_conn, image, max_planes) + check_image(idr_conn, image, max_planes, check_timing=args.timing) log("End: %s" % datetime.now()) From 1f4c0bacfdf433f2d074c4079f66841ccca3149f Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 18 Jan 2024 13:34:04 +0000 Subject: [PATCH 9/9] Fix ratio of local/IDR timing --- scripts/check_pixels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check_pixels.py b/scripts/check_pixels.py index 6403caa0..28581e3e 100644 --- a/scripts/check_pixels.py +++ b/scripts/check_pixels.py @@ -53,7 +53,7 @@ def check_image(idr_conn, image, max_planes, check_timing=False): planes_time_idr = datetime.now() - start_idr if check_timing: - ratio = planes_time_local.microseconds / planes_time_idr.microseconds + ratio = planes_time_local / planes_time_idr log("Ratio of local/IDR timing for %s planes is %s Image: %s" % (len(zctList), ratio, image.id)) log("Local took %s, IDR took %s" % (planes_time_local, planes_time_idr))