-
-
Save travishsu/6efa5c9fb92ece37b4748036026342f6 to your computer and use it in GitHub Desktop.
| import os | |
| import json | |
| import subprocess | |
| import numpy as np | |
| import pandas as pd | |
| from skimage.measure import find_contours | |
| class CocoDatasetHandler: | |
| def __init__(self, jsonpath, imgpath): | |
| with open(jsonpath, 'r') as jsonfile: | |
| ann = json.load(jsonfile) | |
| images = pd.DataFrame.from_dict(ann['images']).set_index('id') | |
| annotations = pd.DataFrame.from_dict(ann['annotations']).set_index('id') | |
| categories = pd.DataFrame.from_dict(ann['categories']).set_index('id') | |
| annotations = annotations.merge(images, left_on='image_id', right_index=True) | |
| annotations = annotations.merge(categories, left_on='category_id', right_index=True) | |
| annotations = annotations.assign( | |
| shapes=annotations.apply(self.coco2shape, axis=1)) | |
| self.annotations = annotations | |
| self.labelme = {} | |
| self.imgpath = imgpath | |
| self.images = pd.DataFrame.from_dict(ann['images']).set_index('file_name') | |
| def coco2shape(self, row): | |
| if row.iscrowd == 1: | |
| shapes = self.rle2shape(row) | |
| elif row.iscrowd == 0: | |
| shapes = self.polygon2shape(row) | |
| return shapes | |
| def rle2shape(self, row): | |
| rle, shape = row['segmentation']['counts'], row['segmentation']['size'] | |
| mask = self._rle_decode(rle, shape) | |
| padded_mask = np.zeros( | |
| (mask.shape[0]+2, mask.shape[1]+2), | |
| dtype=np.uint8, | |
| ) | |
| padded_mask[1:-1, 1:-1] = mask | |
| points = find_contours(mask, 0.5) | |
| shapes = [ | |
| [[int(point[1]), int(point[0])] for point in polygon] | |
| for polygon in points | |
| ] | |
| return shapes | |
| def _rle_decode(self, rle, shape): | |
| mask = np.zeros([shape[0] * shape[1]], np.bool) | |
| for idx, r in enumerate(rle): | |
| if idx < 1: | |
| s = 0 | |
| else: | |
| s = sum(rle[:idx]) | |
| e = s + r | |
| if e == s: | |
| continue | |
| assert 0 <= s < mask.shape[0] | |
| assert 1 <= e <= mask.shape[0], "shape: {} s {} e {} r {}".format(shape, s, e, r) | |
| if idx % 2 == 1: | |
| mask[s:e] = 1 | |
| # Reshape and transpose | |
| mask = mask.reshape([shape[1], shape[0]]).T | |
| return mask | |
| def polygon2shape(self, row): | |
| # shapes: (n_polygons, n_points, 2) | |
| shapes = [ | |
| [[int(points[2*i]), int(points[2*i+1])] for i in range(len(points)//2)] | |
| for points in row.segmentation | |
| ] | |
| return shapes | |
| def coco2labelme(self): | |
| fillColor = [255, 0, 0, 128] | |
| lineColor = [0, 255, 0, 128] | |
| groups = self.annotations.groupby('file_name') | |
| for file_idx, (filename, df) in enumerate(groups): | |
| record = { | |
| 'imageData': None, | |
| 'fillColor': fillColor, | |
| 'lineColor': lineColor, | |
| 'imagePath': filename, | |
| 'imageHeight': int(self.images.loc[filename].height), | |
| 'imageWidth': int(self.images.loc[filename].width), | |
| } | |
| record['shapes'] = [] | |
| instance = { | |
| 'line_color': None, | |
| 'fill_color': None, | |
| 'shape_type': "polygon", | |
| } | |
| for inst_idx, (_, row) in enumerate(df.iterrows()): | |
| for polygon in row.shapes: | |
| copy_instance = instance.copy() | |
| copy_instance.update({ | |
| 'label': row['name'], | |
| 'group_id': inst_idx, | |
| 'points': polygon | |
| }) | |
| record['shapes'].append(copy_instance) | |
| if filename not in self.labelme.keys(): | |
| self.labelme[filename] = record | |
| def save_labelme(self, file_names, dirpath, save_json_only=False): | |
| if not os.path.exists(dirpath): | |
| os.makedirs(dirpath) | |
| else: | |
| raise ValueError(f"{dirpath} has existed") | |
| for file in file_names: | |
| filename = os.path.basename(os.path.splitext(file)[0]) | |
| with open(os.path.join(dirpath, filename+'.json'), 'w') as jsonfile: | |
| json.dump(self.labelme[file], jsonfile, ensure_ascii=True, indent=2) | |
| if not save_json_only: | |
| subprocess.call(['cp', os.path.join(self.imgpath, file), dirpath]) | |
| ds = CocoDatasetHandler('cocodataset/annotations/instances_train2014.json', 'cocodataset/train2014/') | |
| ds.coco2labelme() | |
| ds.save_labelme(ds.labelme.keys(), 'cocodataset/labelme/train2014') |
This script might call polygon2shape(L68) since "iscrowd" is 0.
most of my images are crowded so should I fix it to 1? inorder to get the exact mask number?
Hi @manaswakchaure,
I found there are multiple lists nested in the value of segmentation so there'll be multiple converted masks for a single instance, and the converted masks will have the same group_id.
Besides, using iscrowd=0 is correct if the value of segmentation is in polygon format instead of RLE format.
Dear sir,
Thank you so much!
I got it. I made some modifications for getting those multiple masks as one single instance under one grup_id as needed. And verified it!
Thank you so much for your time.
不知道發這邊好不好
想請問作者有考慮新增png to json嗎?
也就是mask.png 轉成 coco.json (png2coco)
我原本參考這位作者的程式碼執行,他的資料集確實可以跑
但用我的資料去跑,產生COCO裡面的segmentatiom的點的座標,有些是負值,導致無法開起來
https://github.com/chrise96/image-to-coco-json-converter
這是我的資料集檔案連結
https://drive.google.com/drive/folders/1butmjjGTgMIEr6bq1nQ3ejjm0M7oN_YR?usp=share_link
謝謝你
你好,我最後查出原因,只有把照片改成JPG即可
但有個極為困難的點,如果圖形是甜甜圈那種形狀,中間需要挖空
似乎coco json無法表示
用實際例子說明,假設有個正方形的農田,正中間有個農舍房子
因此農田polygon要正方形減去正中間農舍
但coco json的polygon的點,是輪廓組成,因此無法形成
不曉得你這邊有無辦法解決重疊的地方把它去除
Hi @stphtan94117,
我想 png2coco 可能不適合放在這個 coco2labelme 底下。
那位作者用的是多個 polygon 放在同一個 segmentation 且設定 iscrowd 為 0,
但如果有沒辦法用 polygon 表示的 instance,我想你可以考慮用將同一個 instance 的 mask 轉成 RLE format,且讓 iscrowd 設為 1
References


@travishsu Thank you, sir, for your reply.
I have attached the files here https://drive.google.com/drive/folders/1cvPGxPGLCb-6fbDGVEHPDVX2bxPEvx3m?usp=sharing
Thank you!