Created
January 29, 2025 09:53
-
-
Save xulman/c698291ae3ab28cab66e250018a9555d to your computer and use it in GitHub Desktop.
InstanSeg API to create, train, save, and use own models on own data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import torch | |
| from instanseg.scripts.train import instanseg_training | |
| from instanseg.utils.model_loader import load_model as original_load_model | |
| from instanseg import inference_class as IS | |
| device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
| print(f"Will use: {device}") | |
| batch_size = 8 | |
| LOCAL_FOLDER_WITH_MODELS = '.' | |
| def create_official_model(): | |
| """ | |
| creates (gets from somewhere, in fact) a pretrained latest model for fluo cell and nuclei images | |
| """ | |
| return IS.InstanSeg("fluorescence_nuclei_and_cells", image_reader= "skimage.io", verbosity=1, device=device) | |
| def create_official_model_forBrightField(): | |
| return IS.InstanSeg("brightfield_nuclei", image_reader= "skimage.io", verbosity=1, device=device) | |
| def apply_model(model, img): | |
| """ | |
| img must be np.array and normalized | |
| """ | |
| # turn gray to RGB image | |
| if len(img.shape) == 2: | |
| img = np.array([img,img,img], dtype = img.dtype) | |
| # NB: 'img' should be turned to torch tensor, but the eval() methods do "tensorify" themselves too... | |
| # explicitly don't re-scale the input image | |
| img_pixel_size = 1.0 | |
| # code below taken (and slightly adopted) from the original repo's file inference_class.py | |
| # NB: the provided 'model' is already associated with a particular device | |
| num_pixels = np.cumprod(img.shape)[-1] | |
| if num_pixels < model.small_image_threshold: | |
| masks = model.eval_small_image(image = img, | |
| normalise = False, | |
| pixel_size = img_pixel_size, | |
| target = "nuclei", | |
| return_image_tensor = False) | |
| else: | |
| masks = model.eval_medium_image(image = img, | |
| normalise = False, | |
| pixel_size = img_pixel_size, | |
| target = "nuclei", | |
| return_image_tensor = False) | |
| return masks.detach().numpy()[0,-1,:,:] | |
| # NB: always the first image on the list (of length 1) of created images | |
| # NB: always the last channel (either the one-and-only, or the latter if two are available) | |
| def train_model_from_scratch(new_model_name:str = "PC3.instanseg.model", resume_model_name:str = None, epochs = 3): | |
| # NB: number of epochs is for testing now; increase it if you want a serious training | |
| #.... here some loader of images and their instance-segmentation masks; produces technicaly a list of pairs of numpy arrays | |
| print(f"Will use: {len(trainDataLoader.img_files)} and {len(testDataLoader.img_files)} train and test files, with batch size {batch_size}.") | |
| Segmentation_Dataset_Train = [] | |
| for i,m in trainDataLoader: | |
| img = np.array([i,i,i], dtype = i.dtype) # gray -> RGB | |
| Segmentation_Dataset_Train.append({'nucleus_masks':m, 'image':img, 'pixel_size':1.0}) | |
| Segmentation_Dataset_Validation = [] | |
| for i,m in testDataLoader: | |
| img = np.array([i,i,i], dtype = i.dtype) | |
| Segmentation_Dataset_Validation.append({'nucleus_masks':m, 'image':img, 'pixel_size':1.0}) | |
| training_params = { | |
| 'segmentation_dataset' : {'Train':Segmentation_Dataset_Train, 'Validation':Segmentation_Dataset_Validation}, | |
| 'source_dataset' : "all", # makes it not to look for any downloadable dataset | |
| 'target_segmentation' : "N", # implies that the 'nucleus_masks' must be present | |
| 'output_path' : LOCAL_FOLDER_WITH_MODELS, | |
| 'experiment_str' : new_model_name, | |
| 'requested_pixel_size' : 1.0, # no spatial transform (I hope) | |
| 'norm' : None, # no intensity transform (I hope) | |
| 'augmentation_type' : "minimal", # TODO, how to disable this totally? | |
| 'batch_size' : batch_size, | |
| 'num_epochs' : epochs } | |
| if resume_model_name: | |
| training_params['model_path'] = LOCAL_FOLDER_WITH_MODELS | |
| training_params['model_folder'] = resume_model_name | |
| training_params['hotstart_training'] = 0 | |
| # NB: instanseg_training() will re-autodetect the device (even if we provide one) | |
| instanseg_training(**training_params) | |
| # further interesting 'training_params' keys: | |
| # augmentation_type=brightfield_only # help says: 'minimal' or 'heavy' or 'brightfield_only' (no augmentation??) | |
| # dim_in=3 # try later with =1 | |
| # dataset='segmentation' | |
| # device=device(type='cpu') | |
| # target_segmentation='N' | |
| # batch_size=3 | |
| # num_epochs=500 | |
| # length_of_epoch=1000 (Number of samples per epoch -- probably set impossibly high??) | |
| # lr=0.001 | |
| # -s (--save=True) saves model everytime better one is evaluated | |
| # -norm None (--norm) default="BATCH", type=str, help = "Norm layer to use: None, INSTANCE, INSTANCE_INVARIANT, BATCH") | |
| def load_model(model_name): | |
| model,model_dict = original_load_model(model_name, path=LOCAL_FOLDER_WITH_MODELS, device=device) | |
| # the loaded model ain't the same in terms of python type(!) as the one obtained with create_official_model() | |
| # the latter is IS.InstanSeg object and has, e.g., some extra attribs | |
| model.pixel_size = model_dict['pixel_size'] # example of an attrib needed for the IS.eval() | |
| # trying to return an IS.InstanSeg object wrapped around the loaded model... ain't working well | |
| return ( model, IS.InstanSeg(model_type = model, image_reader= "skimage.io", verbosity=1, device=device) ) |
Author
Hi, just noticed the:
'requested_pixel_size' : 1.0, # no spatial transform (I hope)
To be clear that means you want to train a model a 1 micron per pixel. If you want no spatial transform you should leave that parameter to default value (None).
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
With this code, I can happily train InstanSeg on my own data (which I have scaled and normalized my way), and it saves the model (and a bunch of telemetrics files) into my
LOCAL_FOLDER_WITH_MODELS.I believe I can even load the created/trained models back, but I don't know how to run inference with it. My input to the inference would be already scaled and normalized numpy array (or Torch array, what ever is needed).
In fact, I can run the inference on "official" models using the example
inference_class, but this ain't working with the my models loaded from the local drive.I have noticed I could export my models into BioImage Zoo compatible format. Should I do this, and would that be a good input to my
apply_model()function?