__author__ = 'hsfang, mhgou, cxwang'
# Interface for accessing the GraspNet-1Billion dataset.
# Description and part of the codes modified from MSCOCO api
# GraspNet is an open project for general object grasping that is continuously enriched.
# Currently we release GraspNet-1Billion, a large-scale benchmark for general object grasping,
# as well as other related areas (e.g. 6D pose estimation, unseen object segmentation, etc.).
# graspnetapi is a Python API that # assists in loading, parsing and visualizing the
# annotations in GraspNet. Please visit https://graspnet.net/ for more information on GraspNet,
# including for the data, paper, and tutorials. The exact format of the annotations
# is also described on the GraspNet website. For example usage of the graspnetapi
# please see graspnetapi_demo.ipynb. In addition to this API, please download both
# the GraspNet images and annotations in order to run the demo.
# An alternative to using the API is to load the annotations directly
# into Python dictionary
# Using the API provides additional utility functions. Note that this API
# supports both *grasping* and *6d pose* annotations. In the case of
# 6d poses not all functions are defined (e.g. collisions are undefined).
# The following API functions are defined:
# GraspNet - GraspNet api class that loads GraspNet annotation file and prepare data structures.
# checkDataCompleteness- Check the file completeness of the dataset.
# getSceneIds - Get scene ids that satisfy given filter conditions.
# getObjIds - Get obj ids that satisfy given filter conditions.
# getDataIds - Get data ids that satisfy given filter conditions.
# loadBGR - Load image in BGR format.
# loadRGB - Load image in RGB format.
# loadDepth - Load depth image.
# loadMask - Load the segmentation masks.
# loadSceneModels - Load object models in a scene.
# loadScenePointCloud - Load point cloud constructed by the depth and color image.
# loadWorkSpace - Load the workspace bounding box.
# loadGraspLabels - Load grasp labels with the specified object ids.
# loadObjModels - Load object 3d mesh model with the specified object ids.
# loadObjTrimesh - Load object 3d mesh in Trimesh format.
# loadCollisionLabels - Load collision labels with the specified scene ids.
# loadGrasp - Load grasp labels with the specified scene and annotation id.
# loadData - Load data path with the specified data ids.
# showObjGrasp - Save visualization of the grasp pose of specified object ids.
# showSceneGrasp - Save visualization of the grasp pose of specified scene ids.
# show6DPose - Save visualization of the 6d pose of specified scene ids, project obj models onto pointcloud
# Throughout the API "ann"=annotation, "obj"=object, and "img"=image.
# GraspNet Toolbox. version 1.0
# Data, paper, and tutorials available at: https://graspnet.net/
# Code written by Hao-Shu Fang, Minghao Gou and Chenxi Wang, 2020.
# Licensed under the none commercial CC4.0 license [see https://graspnet.net/about]
import os
import numpy as np
from tqdm import tqdm
import open3d as o3d
import cv2
import trimesh
from .grasp import Grasp, GraspGroup, RectGrasp, RectGraspGroup, RECT_GRASP_ARRAY_LEN
from .utils.utils import transform_points, parse_posevector
from .utils.xmlhandler import xmlReader
TOTAL_SCENE_NUM = 190
GRASP_HEIGHT = 0.02
def _isArrayLike(obj):
return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
[docs]class GraspNet():
def __init__(self, root, camera='kinect', split='train'):
'''
graspnetAPI main class.
**input**:
- camera: string of type of camera: "kinect" or "realsense"
- split: string of type of split of dataset: "all", "train", "test", "test_seen", "test_similar" or "test_novel"
'''
assert camera in ['kinect', 'realsense'], 'camera should be kinect or realsense'
assert split in ['all', 'train', 'test', 'test_seen', 'test_similar', 'test_novel'], 'split should be all/train/test/test_seen/test_similar/test_novel'
self.root = root
self.camera = camera
self.split = split
self.collisionLabels = {}
if split == 'all':
self.sceneIds = list(range(TOTAL_SCENE_NUM))
elif split == 'train':
self.sceneIds = list(range(100))
elif split == 'test':
self.sceneIds = list(range(100, 190))
elif split == 'test_seen':
self.sceneIds = list(range(100, 130))
elif split == 'test_similar':
self.sceneIds = list(range(130, 160))
elif split == 'test_novel':
self.sceneIds = list(range(160, 190))
self.rgbPath = []
self.depthPath = []
self.segLabelPath = []
self.metaPath = []
self.rectLabelPath = []
self.sceneName = []
self.annId = []
for i in tqdm(self.sceneIds, desc='Loading data path...'):
for img_num in range(256):
self.rgbPath.append(os.path.join(
root, 'scenes', 'scene_'+str(i).zfill(4), camera, 'rgb', str(img_num).zfill(4)+'.png'))
self.depthPath.append(os.path.join(
root, 'scenes', 'scene_'+str(i).zfill(4), camera, 'depth', str(img_num).zfill(4)+'.png'))
self.segLabelPath.append(os.path.join(
root, 'scenes', 'scene_'+str(i).zfill(4), camera, 'label', str(img_num).zfill(4)+'.png'))
self.metaPath.append(os.path.join(
root, 'scenes', 'scene_'+str(i).zfill(4), camera, 'meta', str(img_num).zfill(4)+'.mat'))
self.rectLabelPath.append(os.path.join(
root, 'scenes', 'scene_'+str(i).zfill(4), camera, 'rect', str(img_num).zfill(4)+'.npy'))
self.sceneName.append('scene_'+str(i).zfill(4))
self.annId.append(img_num)
self.objIds = self.getObjIds(self.sceneIds)
def __len__(self):
return len(self.depthPath)
[docs] def checkDataCompleteness(self):
'''
Check whether the dataset files are complete.
**Output:**
- bool, True for complete, False for not complete.
'''
error_flag = False
for obj_id in tqdm(range(88), 'Checking Models'):
if not os.path.exists(os.path.join(self.root, 'models','%03d' % obj_id, 'nontextured.ply')):
error_flag = True
print('No nontextured.ply For Object {}'.format(obj_id))
if not os.path.exists(os.path.join(self.root, 'models','%03d' % obj_id, 'textured.sdf')):
error_flag = True
print('No textured.sdf For Object {}'.format(obj_id))
if not os.path.exists(os.path.join(self.root, 'models','%03d' % obj_id, 'textured.obj')):
error_flag = True
print('No textured.obj For Object {}'.format(obj_id))
for obj_id in tqdm(range(88), 'Checking Grasp Labels'):
if not os.path.exists(os.path.join(self.root, 'grasp_label', '%03d_labels.npz' % obj_id)):
error_flag = True
print('No Grasp Label For Object {}'.format(obj_id))
for sceneId in tqdm(self.sceneIds, 'Checking Collosion Labels'):
if not os.path.exists(os.path.join(self.root, 'collision_label', 'scene_%04d' % sceneId, 'collision_labels.npz')):
error_flag = True
print('No Collision Labels For Scene {}'.format(sceneId))
for sceneId in tqdm(self.sceneIds, 'Checking Scene Datas'):
scene_dir = os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId)
if not os.path.exists(os.path.join(scene_dir,'object_id_list.txt')):
error_flag = True
print('No Object Id List For Scene {}'.format(sceneId))
if not os.path.exists(os.path.join(scene_dir,'rs_wrt_kn.npy')):
error_flag = True
print('No rs_wrt_kn.npy For Scene {}'.format(sceneId))
for camera in [self.camera]:
camera_dir = os.path.join(scene_dir, camera)
if not os.path.exists(os.path.join(camera_dir,'cam0_wrt_table.npy')):
error_flag = True
print('No cam0_wrt_table.npy For Scene {}, Camera:{}'.format(sceneId, camera))
if not os.path.exists(os.path.join(camera_dir,'camera_poses.npy')):
error_flag = True
print('No camera_poses.npy For Scene {}, Camera:{}'.format(sceneId, camera))
if not os.path.exists(os.path.join(camera_dir,'camK.npy')):
error_flag = True
print('No camK.npy For Scene {}, Camera:{}'.format(sceneId, camera))
for annId in range(256):
if not os.path.exists(os.path.join(camera_dir,'rgb','%04d.png' % annId)):
error_flag = True
print('No RGB Image For Scene {}, Camera:{}, annotion:{}'.format(sceneId, camera, annId))
if not os.path.exists(os.path.join(camera_dir,'depth','%04d.png' % annId)):
error_flag = True
print('No Depth Image For Scene {}, Camera:{}, annotion:{}'.format(sceneId, camera, annId))
if not os.path.exists(os.path.join(camera_dir,'label','%04d.png' % annId)):
error_flag = True
print('No Mask Label image For Scene {}, Camera:{}, annotion:{}'.format(sceneId, camera, annId))
if not os.path.exists(os.path.join(camera_dir,'meta','%04d.mat' % annId)):
error_flag = True
print('No Meta Data For Scene {}, Camera:{}, annotion:{}'.format(sceneId, camera, annId))
if not os.path.exists(os.path.join(camera_dir,'annotations','%04d.xml' % annId)):
error_flag = True
print('No Annotations For Scene {}, Camera:{}, annotion:{}'.format(sceneId, camera, annId))
if not os.path.exists(os.path.join(camera_dir,'rect','%04d.npy' % annId)):
error_flag = True
print('No Rectangle Labels For Scene {}, Camera:{}, annotion:{}'.format(sceneId, camera, annId))
return not error_flag
[docs] def getSceneIds(self, objIds=None):
'''
**Input:**
- objIds: int or list of int of the object ids.
**Output:**
- a list of int of the scene ids that contains **all** the objects.
'''
if objIds is None:
return self.sceneIds
assert _isArrayLike(objIds) or isinstance(objIds, int), 'objIds must be integer or a list/numpy array of integers'
objIds = objIds if _isArrayLike(objIds) else [objIds]
sceneIds = []
for i in self.sceneIds:
f = open(os.path.join(self.root, 'scenes', 'scene_' + str(i).zfill(4), 'object_id_list.txt'))
idxs = [int(line.strip()) for line in f.readlines()]
check = all(item in idxs for item in objIds)
if check:
sceneIds.append(i)
return sceneIds
[docs] def getObjIds(self, sceneIds=None):
'''
**Input:**
- sceneIds: int or list of int of the scene ids.
**Output:**
- a list of int of the object ids in the given scenes.
'''
# get object ids in the given scenes
if sceneIds is None:
return self.objIds
assert _isArrayLike(sceneIds) or isinstance(sceneIds, int), 'sceneIds must be an integer or a list/numpy array of integers'
sceneIds = sceneIds if _isArrayLike(sceneIds) else [sceneIds]
objIds = []
for i in sceneIds:
f = open(os.path.join(self.root, 'scenes', 'scene_' + str(i).zfill(4), 'object_id_list.txt'))
idxs = [int(line.strip()) for line in f.readlines()]
objIds = list(set(objIds+idxs))
return objIds
[docs] def getDataIds(self, sceneIds=None):
'''
**Input:**
- sceneIds:int or list of int of the scenes ids.
**Output:**
- a list of int of the data ids. Data could be accessed by calling self.loadData(ids).
'''
# get index for datapath that contains the given scenes
if sceneIds is None:
return list(range(len(self.sceneName)))
ids = []
indexPosList = []
for i in sceneIds:
indexPosList += [ j for j in range(0,len(self.sceneName),256) if self.sceneName[j] == 'scene_'+str(i).zfill(4) ]
for idx in indexPosList:
ids += list(range(idx, idx+256))
return ids
[docs] def loadGraspLabels(self, objIds=None):
'''
**Input:**
- objIds: int or list of int of the object ids.
**Output:**
- a dict of grasplabels of each object.
'''
# load object-level grasp labels of the given obj ids
objIds = self.objIds if objIds is None else objIds
assert _isArrayLike(objIds) or isinstance(objIds, int), 'objIds must be an integer or a list/numpy array of integers'
objIds = objIds if _isArrayLike(objIds) else [objIds]
graspLabels = {}
for i in tqdm(objIds, desc='Loading grasping labels...'):
file = np.load(os.path.join(self.root, 'grasp_label', '{}_labels.npz'.format(str(i).zfill(3))))
graspLabels[i] = (file['points'].astype(np.float32), file['offsets'].astype(np.float32), file['scores'].astype(np.float32))
return graspLabels
[docs] def loadObjModels(self, objIds=None):
'''
**Function:**
- load object 3D models of the given obj ids
**Input:**
- objIDs: int or list of int of the object ids
**Output:**
- a list of open3d.geometry.PointCloud of the models
'''
objIds = self.objIds if objIds is None else objIds
assert _isArrayLike(objIds) or isinstance(objIds, int), 'objIds must be an integer or a list/numpy array of integers'
objIds = objIds if _isArrayLike(objIds) else [objIds]
models = []
for i in tqdm(objIds, desc='Loading objects...'):
plyfile = os.path.join(self.root, 'models','%03d' % i, 'nontextured.ply')
models.append(o3d.io.read_point_cloud(plyfile))
return models
[docs] def loadObjTrimesh(self, objIds=None):
'''
**Function:**
- load object 3D trimesh of the given obj ids
**Input:**
- objIDs: int or list of int of the object ids
**Output:**
- a list of trimesh.Trimesh of the models
'''
objIds = self.objIds if objIds is None else objIds
assert _isArrayLike(objIds) or isinstance(objIds, int), 'objIds must be an integer or a list/numpy array of integers'
objIds = objIds if _isArrayLike(objIds) else [objIds]
models = []
for i in tqdm(objIds, desc='Loading objects...'):
plyfile = os.path.join(self.root, 'models','%03d' % i, 'nontextured.ply')
models.append(trimesh.load(plyfile))
return models
[docs] def loadCollisionLabels(self, sceneIds=None):
'''
**Input:**
- sceneIds: int or list of int of the scene ids.
**Output:**
- dict of the collision labels.
'''
sceneIds = self.sceneIds if sceneIds is None else sceneIds
assert _isArrayLike(sceneIds) or isinstance(sceneIds, int), 'sceneIds must be an integer or a list/numpy array of integers'
sceneIds = sceneIds if _isArrayLike(sceneIds) else [sceneIds]
collisionLabels = {}
for sid in tqdm(sceneIds, desc='Loading collision labels...'):
labels = np.load(os.path.join(self.root, 'collision_label','scene_'+str(sid).zfill(4), 'collision_labels.npz'))
collisionLabel = []
for j in range(len(labels)):
collisionLabel.append(labels['arr_{}'.format(j)])
collisionLabels['scene_'+str(sid).zfill(4)] = collisionLabel
return collisionLabels
[docs] def loadRGB(self, sceneId, camera, annId):
'''
**Input:**
- sceneId: int of the scene index.
- camera: string of type of camera, 'realsense' or 'kinect'
- annId: int of the annotation index.
**Output:**
- numpy array of the rgb in RGB order.
'''
return cv2.cvtColor(cv2.imread(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'rgb', '%04d.png' % annId)), cv2.COLOR_BGR2RGB)
[docs] def loadBGR(self, sceneId, camera, annId):
'''
**Input:**
- sceneId: int of the scene index.
- camera: string of type of camera, 'realsense' or 'kinect'
- annId: int of the annotation index.
**Output:**
- numpy array of the rgb in BGR order.
'''
return cv2.imread(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'rgb', '%04d.png' % annId))
[docs] def loadDepth(self, sceneId, camera, annId):
'''
**Input:**
- sceneId: int of the scene index.
- camera: string of type of camera, 'realsense' or 'kinect'
- annId: int of the annotation index.
**Output:**
- numpy array of the depth with dtype = np.uint16
'''
return cv2.imread(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'depth', '%04d.png' % annId), cv2.IMREAD_UNCHANGED)
[docs] def loadMask(self, sceneId, camera, annId):
'''
**Input:**
- sceneId: int of the scene index.
- camera: string of type of camera, 'realsense' or 'kinect'
- annId: int of the annotation index.
**Output:**
- numpy array of the mask with dtype = np.uint16
'''
return cv2.imread(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'label', '%04d.png' % annId), cv2.IMREAD_UNCHANGED)
[docs] def loadWorkSpace(self, sceneId, camera, annId):
'''
**Input:**
- sceneId: int of the scene index.
- camera: string of type of camera, 'realsense' or 'kinect'
- annId: int of the annotation index.
**Output:**
- tuple of the bounding box coordinates (x1, y1, x2, y2).
'''
mask = self.loadMask(sceneId, camera, annId)
maskx = np.any(mask, axis=0)
masky = np.any(mask, axis=1)
x1 = np.argmax(maskx)
y1 = np.argmax(masky)
x2 = len(maskx) - np.argmax(maskx[::-1])
y2 = len(masky) - np.argmax(masky[::-1])
return (x1, y1, x2, y2)
[docs] def loadScenePointCloud(self, sceneId, camera, annId, align=False, format = 'open3d', use_workspace = False, use_mask = True, use_inpainting = False):
'''
**Input:**
- sceneId: int of the scene index.
- camera: string of type of camera, 'realsense' or 'kinect'
- annId: int of the annotation index.
- aligh: bool of whether align to the table frame.
- format: string of the returned type. 'open3d' or 'numpy'
- use_workspace: bool of whether crop the point cloud in the work space.
- use_mask: bool of whether crop the point cloud use mask(z>0), only open3d 0.9.0 is supported for False option.
Only turn to False if you know what you are doing.
- use_inpainting: bool of whether inpaint the depth image for the missing information.
**Output:**
- open3d.geometry.PointCloud instance of the scene point cloud.
- or tuple of numpy array of point locations and colors.
'''
colors = self.loadRGB(sceneId = sceneId, camera = camera, annId = annId).astype(np.float32) / 255.0
depths = self.loadDepth(sceneId = sceneId, camera = camera, annId = annId)
if use_inpainting:
fault_mask = depths < 200
depths[fault_mask] = 0
inpainting_mask = (np.abs(depths) < 10).astype(np.uint8)
depths = cv2.inpaint(depths, inpainting_mask, 5, cv2.INPAINT_NS)
intrinsics = np.load(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'camK.npy'))
fx, fy = intrinsics[0,0], intrinsics[1,1]
cx, cy = intrinsics[0,2], intrinsics[1,2]
s = 1000.0
if align:
camera_poses = np.load(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'camera_poses.npy'))
camera_pose = camera_poses[annId]
align_mat = np.load(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'cam0_wrt_table.npy'))
camera_pose = align_mat.dot(camera_pose)
xmap, ymap = np.arange(colors.shape[1]), np.arange(colors.shape[0])
xmap, ymap = np.meshgrid(xmap, ymap)
points_z = depths / s
points_x = (xmap - cx) / fx * points_z
points_y = (ymap - cy) / fy * points_z
# print(f'points_x.shape:{points_x.shape}')
# print(f'points_y.shape:{points_y.shape}')
# print(f'points_z.shape:{points_z.shape}')
if use_workspace:
(x1, y1, x2, y2) = self.loadWorkSpace(sceneId, camera, annId)
points_z = points_z[y1:y2,x1:x2]
points_x = points_x[y1:y2,x1:x2]
points_y = points_y[y1:y2,x1:x2]
colors = colors[y1:y2,x1:x2]
mask = (points_z > 0)
points = np.stack([points_x, points_y, points_z], axis=-1)
# print(f'points.shape:{points.shape}')
if use_mask:
points = points[mask]
colors = colors[mask]
else:
points = points.reshape((-1, 3))
colors = colors.reshape((-1, 3))
if align:
points = transform_points(points, camera_pose)
if format == 'open3d':
cloud = o3d.geometry.PointCloud()
cloud.points = o3d.utility.Vector3dVector(points)
cloud.colors = o3d.utility.Vector3dVector(colors)
return cloud
elif format == 'numpy':
return points, colors
else:
raise ValueError('Format must be either "open3d" or "numpy".')
[docs] def loadSceneModel(self, sceneId, camera = 'kinect', annId = 0, align = False):
'''
**Input:**
- sceneId: int of the scene index.
- camera: string of type of camera, 'realsense' or 'kinect'
- annId: int of the annotation index.
- align: bool of whether align to the table frame.
**Output:**
- open3d.geometry.PointCloud list of the scene models.
'''
if align:
camera_poses = np.load(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'camera_poses.npy'))
camera_pose = camera_poses[annId]
align_mat = np.load(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'cam0_wrt_table.npy'))
camera_pose = np.matmul(align_mat,camera_pose)
scene_reader = xmlReader(os.path.join(self.root, 'scenes', 'scene_%04d' % sceneId, camera, 'annotations', '%04d.xml'% annId))
posevectors = scene_reader.getposevectorlist()
obj_list = []
mat_list = []
model_list = []
pose_list = []
for posevector in posevectors:
obj_idx, pose = parse_posevector(posevector)
obj_list.append(obj_idx)
mat_list.append(pose)
for obj_idx, pose in zip(obj_list, mat_list):
plyfile = os.path.join(self.root, 'models', '%03d'%obj_idx, 'nontextured.ply')
model = o3d.io.read_point_cloud(plyfile)
points = np.array(model.points)
if align:
pose = np.dot(camera_pose, pose)
points = transform_points(points, pose)
model.points = o3d.utility.Vector3dVector(points)
model_list.append(model)
pose_list.append(pose)
return model_list
[docs] def loadGrasp(self, sceneId, annId=0, format = '6d', camera='kinect', grasp_labels = None, collision_labels = None, fric_coef_thresh=0.4):
'''
**Input:**
- sceneId: int of scene id.
- annId: int of annotation id.
- format: string of grasp format, '6d' or 'rect'.
- camera: string of camera type, 'kinect' or 'realsense'.
- grasp_labels: dict of grasp labels. Call self.loadGraspLabels if not given.
- collision_labels: dict of collision labels. Call self.loadCollisionLabels if not given.
- fric_coef_thresh: float of the frcition coefficient threshold of the grasp.
**ATTENTION**
the LOWER the friction coefficient is, the better the grasp is.
**Output:**
- If format == '6d', return a GraspGroup instance.
- If format == 'rect', return a RectGraspGroup instance.
'''
import numpy as np
assert format == '6d' or format == 'rect', 'format must be "6d" or "rect"'
if format == '6d':
from .utils.xmlhandler import xmlReader
from .utils.utils import get_obj_pose_list, generate_views, get_model_grasps, transform_points
from .utils.rotation import batch_viewpoint_params_to_matrix
camera_poses = np.load(os.path.join(self.root,'scenes','scene_%04d' %(sceneId,),camera, 'camera_poses.npy'))
camera_pose = camera_poses[annId]
scene_reader = xmlReader(os.path.join(self.root,'scenes','scene_%04d' %(sceneId,),camera,'annotations','%04d.xml' %(annId,)))
pose_vectors = scene_reader.getposevectorlist()
obj_list,pose_list = get_obj_pose_list(camera_pose,pose_vectors)
if grasp_labels is None:
print('warning: grasp_labels are not given, calling self.loadGraspLabels to retrieve them')
grasp_labels = self.loadGraspLabels(objIds = obj_list)
if collision_labels is None:
print('warning: collision_labels are not given, calling self.loadCollisionLabels to retrieve them')
collision_labels = self.loadCollisionLabels(sceneId)
num_views, num_angles, num_depths = 300, 12, 4
template_views = generate_views(num_views)
template_views = template_views[np.newaxis, :, np.newaxis, np.newaxis, :]
template_views = np.tile(template_views, [1, 1, num_angles, num_depths, 1])
collision_dump = collision_labels['scene_'+str(sceneId).zfill(4)]
# grasp = dict()
grasp_group = GraspGroup()
for i, (obj_idx, trans) in enumerate(zip(obj_list, pose_list)):
sampled_points, offsets, fric_coefs = grasp_labels[obj_idx]
collision = collision_dump[i]
point_inds = np.arange(sampled_points.shape[0])
num_points = len(point_inds)
target_points = sampled_points[:, np.newaxis, np.newaxis, np.newaxis, :]
target_points = np.tile(target_points, [1, num_views, num_angles, num_depths, 1])
views = np.tile(template_views, [num_points, 1, 1, 1, 1])
angles = offsets[:, :, :, :, 0]
depths = offsets[:, :, :, :, 1]
widths = offsets[:, :, :, :, 2]
mask1 = ((fric_coefs <= fric_coef_thresh) & (fric_coefs > 0) & ~collision)
target_points = target_points[mask1]
target_points = transform_points(target_points, trans)
target_points = transform_points(target_points, np.linalg.inv(camera_pose))
views = views[mask1]
angles = angles[mask1]
depths = depths[mask1]
widths = widths[mask1]
fric_coefs = fric_coefs[mask1]
Rs = batch_viewpoint_params_to_matrix(-views, angles)
Rs = np.matmul(trans[np.newaxis, :3, :3], Rs)
Rs = np.matmul(np.linalg.inv(camera_pose)[np.newaxis,:3,:3], Rs)
num_grasp = widths.shape[0]
scores = (1.1 - fric_coefs).reshape(-1,1)
widths = widths.reshape(-1,1)
heights = GRASP_HEIGHT * np.ones((num_grasp,1))
depths = depths.reshape(-1,1)
rotations = Rs.reshape((-1,9))
object_ids = obj_idx * np.ones((num_grasp,1), dtype=np.int32)
obj_grasp_array = np.hstack([scores, widths, heights, depths, rotations, target_points, object_ids]).astype(np.float32)
grasp_group.grasp_group_array = np.concatenate((grasp_group.grasp_group_array, obj_grasp_array))
return grasp_group
else:
# 'rect'
rect_grasps = RectGraspGroup(os.path.join(self.root,'scenes','scene_%04d' % sceneId,camera,'rect','%04d.npy' % annId))
return rect_grasps
[docs] def loadData(self, ids=None, *extargs):
'''
**Input:**
- ids: int or list of int of the the data ids.
- extargs: extra arguments. This function can also be called with loadData(sceneId, camera, annId)
**Output:**
- if ids is int, returns a tuple of data path
- if ids is not specified or is a list, returns a tuple of data path lists
'''
if ids is None:
return (self.rgbPath, self.depthPath, self.segLabelPath, self.metaPath, self.rectLabelPath, self.sceneName, self.annId)
if len(extargs) == 0:
if isinstance(ids, int):
return (self.rgbPath[ids], self.depthPath[ids], self.segLabelPath[ids], self.metaPath[ids], self.rectLabelPath[ids], self.sceneName[ids], self.annId[ids])
else:
return ([self.rgbPath[id] for id in ids],
[self.depthPath[id] for id in ids],
[self.segLabelPath[id] for id in ids],
[self.metaPath[id] for id in ids],
[self.rectLabelPath[id] for id in ids],
[self.sceneName[id] for id in ids],
[self.annId[id] for id in ids])
if len(extargs) == 2:
sceneId = ids
camera, annId = extargs
rgbPath = os.path.join(self.root, 'scenes', 'scene_'+str(sceneId).zfill(4), camera, 'rgb', str(annId).zfill(4)+'.png')
depthPath = os.path.join(self.root, 'scenes', 'scene_'+str(sceneId).zfill(4), camera, 'depth', str(annId).zfill(4)+'.png')
segLabelPath = os.path.join(self.root, 'scenes', 'scene_'+str(sceneId).zfill(4), camera, 'label', str(annId).zfill(4)+'.png')
metaPath = os.path.join(self.root, 'scenes', 'scene_'+str(sceneId).zfill(4), camera, 'meta', str(annId).zfill(4)+'.mat')
rectLabelPath = os.path.join(self.root, 'scenes', 'scene_'+str(sceneId).zfill(4), camera, 'rect', str(annId).zfill(4)+'.npy')
scene_name = 'scene_'+str(sceneId).zfill(4)
return (rgbPath, depthPath, segLabelPath, metaPath, rectLabelPath, scene_name,annId)
[docs] def showObjGrasp(self, objIds=[], numGrasp=10, th=0.5, maxWidth=0.08, saveFolder='save_fig', show=False):
'''
**Input:**
- objIds: int of list of objects ids.
- numGrasp: how many grasps to show in the image.
- th: threshold of the coefficient of friction.
- maxWidth: float, only visualize grasps with width<=maxWidth
- saveFolder: string of the path to save the rendered image.
- show: bool of whether to show the image.
**Output:**
- No output but save the rendered image and maybe show it.
'''
from .utils.vis import visObjGrasp
objIds = objIds if _isArrayLike(objIds) else [objIds]
if len(objIds) == 0:
print('You need to specify object ids.')
return 0
if not os.path.exists(saveFolder):
os.mkdir(saveFolder)
for obj_id in objIds:
visObjGrasp(self.root, obj_id, num_grasp=numGrasp, th=th, max_width=maxWidth, save_folder=saveFolder, show=show)
[docs] def showSceneGrasp(self, sceneId, camera = 'kinect', annId = 0, format = '6d', numGrasp = 20, show_object = True, coef_fric_thresh = 0.1):
'''
**Input:**
- sceneId: int of the scene index.
- camera: string of the camera type, 'realsense' or 'kinect'.
- annId: int of the annotation index.
- format: int of the annotation type, 'rect' or '6d'.
- numGrasp: int of the displayed grasp number, grasps will be randomly sampled.
- coef_fric_thresh: float of the friction coefficient of grasps.
'''
if format == '6d':
geometries = []
sceneGrasp = self.loadGrasp(sceneId = sceneId, annId = annId, camera = camera, format = '6d', fric_coef_thresh = coef_fric_thresh)
sceneGrasp = sceneGrasp.random_sample(numGrasp = numGrasp)
scenePCD = self.loadScenePointCloud(sceneId = sceneId, camera = camera, annId = annId, align = False)
geometries.append(scenePCD)
geometries += sceneGrasp.to_open3d_geometry_list()
if show_object:
objectPCD = self.loadSceneModel(sceneId = sceneId, camera = camera, annId = annId, align = False)
geometries += objectPCD
o3d.visualization.draw_geometries(geometries)
elif format == 'rect':
bgr = self.loadBGR(sceneId = sceneId, camera = camera, annId = annId)
sceneGrasp = self.loadGrasp(sceneId = sceneId, camera = camera, annId = annId, format = 'rect', fric_coef_thresh = coef_fric_thresh)
sceneGrasp = sceneGrasp.random_sample(numGrasp = numGrasp)
img = sceneGrasp.to_opencv_image(bgr, numGrasp = numGrasp)
cv2.imshow('Rectangle Grasps',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
[docs] def show6DPose(self, sceneIds, saveFolder='save_fig', show=False, perObj=False):
'''
**Input:**
- sceneIds: int or list of scene ids.
- saveFolder: string of the folder to store the image.
- show: bool of whether to show the image.
- perObj: bool, show grasps on each object
**Output:**
- No output but to save the rendered image and maybe show the result.
'''
from .utils.vis import vis6D
sceneIds = sceneIds if _isArrayLike(sceneIds) else [sceneIds]
if len(sceneIds) == 0:
print('You need specify scene ids.')
return 0
if not os.path.exists(saveFolder):
os.mkdir(saveFolder)
for scene_id in sceneIds:
scene_name = 'scene_'+str(scene_id).zfill(4)
vis6D(self.root, scene_name, 0, self.camera,
align_to_table=True, save_folder=saveFolder, show=show, per_obj=perObj)