Source code for assessment.annotation

"""
---------------------------------------------------------------------------
Created on Fri Feb  4 11:42:52 2023

----------------------------------------------------------------------------

**Title:**       ValidPath Toolbox - Annotation File Generation Module

**Description:**  This is the Annotation File Generator module for the ValidPath toolbox. It is includes Annotation_Generator class and several methods
              
**Classes:**      Annotation_Generator
              

**Methods:**     There are three methods in the Annotation File Generation module as follows:

                    •	ROI_Generator.generate_map_file(input_DIR: str, output_DIR: str, file_Name: str)
                    
                    •	ROI_Generator.create_xml(input_DIR,file_Name,path_size,ROI_output_DIR,tag_name)
                    
                    •	make_region(self, x , y , id , txt,path_size,Regions)

---------------------------------------------------------------------------
Author: SeyedM.MousaviKahaki (seyed.kahaki@fda.hhs.gov)
Version ='1.0'
---------------------------------------------------------------------------
"""


import pandas as pd
import os
from lxml import etree as et
from os import walk

[docs] class Annotation_Generator: def __init__(self): pass
[docs] def make_region(self, x , y , id , txt,path_size,Regions): """ This method generate the XMl file structure and fill the content based on the Aperio ImageScope standard :Parameters: x : integer Output Directory to save the extracted annotations x : integer List of included WSIs txt : string List of XML files associated with included WSIs path_size : integer Patch size Regions : object the corresponsing XML region object :Returns: XML strycture """ print(x) print(path_size) Region = et.SubElement(Regions, 'Region') Region.set("Type", "1" ) # type "1" : means it is rect Region.set("Id", str(id)) Region.set("Text",str(txt)) Vertices = et.SubElement(Region, 'Vertices') Vertex = et.SubElement(Vertices, 'Vertex') # top left Vertex.set('X', str(x)) Vertex.set('Y', str(y)) Vertex.set('Z', str(0)) Vertex = et.SubElement(Vertices, 'Vertex') # top right Vertex.set('X', str(x+path_size)) Vertex.set('Y', str(y)) Vertex.set('Z', str(0)) Vertex = et.SubElement(Vertices, 'Vertex') # bottom left Vertex.set('X', str(x+path_size)) Vertex.set('Y', str(y+path_size)) Vertex.set('Z', str(0)) Vertex = et.SubElement(Vertices, 'Vertex') # bottom right Vertex.set('X', str(x)) Vertex.set('Y', str(y+path_size)) Vertex.set('Z', str(0))
[docs] def create_xml(self,input_DIR, file_path ,path_size , save_xml_path): """ This method reads the map file generated uisng the ROI_Generator.generate_map_file and generated the XML annotation file based on Aperio ImageScope standard. :Parameters: input_DIR : string the path to the input directory of mapping file file_path : string map file name (csv) path_size : integer Size of image patch save_xml_path: string output directory :Returns: XML – the XML files """ csv_file = pd.read_csv(input_DIR+file_path,index_col='WSI') csv_file.sort_values(by=['WSI'], inplace=True) #print("\n\n ****sorted***\n") lst_of_ann = csv_file["N_ANN"] #print(lst_of_ann) #print(len(lst_of_ann)) count = 0 df2 =csv_file.index df2 = df2.drop_duplicates() wsi_names = df2.values for wsi_name in wsi_names: root = et.Element('Annotations') object_elem = et.SubElement(root, 'Annotation') object_elem.set("Name", str(lst_of_ann[count]) ) #print(str(lst_of_ann[count])) Regions = et.SubElement(object_elem, 'Regions') dataf = csv_file.loc[wsi_name] print(wsi_name) if dataf.X.ndim==0 : count += 1 #print(dataf['X'],"\t" ,dataf['Y'],"\t",dataf['TEXT']) self.make_region(dataf['X'], dataf['Y'], 1,dataf['TEXT'],path_size, Regions) else : for k in range(len(dataf)): count += 1 #print(dataf['X'],"\t" ,dataf['Y'],"\t",dataf['TEXT']) self.make_region(dataf['X'][k], dataf['Y'][k], k+1,dataf['TEXT'][k],path_size,Regions) out = et.tostring(root, pretty_print=True, encoding='utf8') savepath = os.path.join(save_xml_path, f'{wsi_name}.xml') with open(savepath, 'wb') as fd: fd.write(out)
[docs] def generate_map_file(self, input_DIR,output_DIR, file_Name,tag_name): """ This method extracts different types for annotations from Whole Slide Images. It can save the extracted annottions to the output directory as defined in inputs. This code also handles several annotations per slide. The output directory will be generated based on the strucutr of the input directories. :Parameters: input_DIR : string the path to the input directory of image patches output_DIR : str the path to the output directory to save the map file file_Name : string map file name (csv) tag_name : string Tag name :Returns: CSV – the map file """ print(input_DIR) f = [] for (dirpath, dirnames, filenames) in walk(input_DIR): #print("Processing "+filenames) f.extend(filenames) break Data = [] for files in f: split = files.split("_") WSI = split[1] TEXT = split[0]+"_"+split[1] X = split[3] Y = split[5] N_ANN = tag_name Data.append( { 'WSI': WSI, 'TEXT': TEXT, 'X': X, 'Y': Y, 'N_ANN': N_ANN, }) Datadf = pd.DataFrame(Data) Datadf.to_csv(output_DIR+file_Name,encoding='utf-8')