diff --git a/baselines/agents/slam_agents.py b/baselines/agents/slam_agents.py new file mode 100644 index 0000000000000000000000000000000000000000..741718639ffa7a35db3b8d2da3836996ff081163 --- /dev/null +++ b/baselines/agents/slam_agents.py @@ -0,0 +1,623 @@ +import argparse +import numpy as np +import torch +import random +import time +import os +import PIL +from math import pi +import torch.nn.functional as F +import orbslam2 +import habitat +from baselines.slambased.utils import ( + generate_2dgrid, +) +from baselines.slambased.reprojection import ( + homogenize_p, + get_distance, + project_tps_into_worldmap, + get_direction, + habitat_goalpos_to_mapgoal_pos, + planned_path2tps, + angle_to_pi_2_minus_pi_2 +) +from baselines.slambased.reprojection import ( + angle_to_pi_2_minus_pi_2 as norm_ang +) +from habitat.sims.habitat_simulator import ( + SimulatorActions, + SIM_ACTION_TO_NAME, + SIM_NAME_TO_ACTION, +) +from baselines.slambased.mappers import DirectDepthMapper +from baselines.slambased.path_planners import DifferentiableStarPlanner + +from baselines.config.default import cfg +from habitat.config.default import get_config + +from baselines.slambased.monodepth import MonoDepthEstimator + +# https://sumit-ghosh.com/articles/python-download-progress-bar/ +import sys +import requests +def download(url, filename): + with open(filename, 'wb') as f: + response = requests.get(url, stream=True) + total = response.headers.get('content-length') + if total is None: + f.write(response.content) + else: + downloaded = 0 + total = int(total) + for data in response.iter_content(chunk_size=max(int(total/1000), 1024*1024)): + downloaded += len(data) + f.write(data) + done = int(50*downloaded/total) + sys.stdout.write('\r[{}{}]'.format('â–ˆ' * done, '.' * (50-done))) + sys.stdout.flush() + sys.stdout.write('\n') + + +def ResizePIL2(np_img, size = 256): + im1 = PIL.Image.fromarray(np_img) + return np.array(im1.resize((size,size))) + +def make_good_config_for_orbslam2(config): + config.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR", "DEPTH_SENSOR"] + config.SIMULATOR.RGB_SENSOR.WIDTH = 256 + config.SIMULATOR.RGB_SENSOR.HEIGHT = 256 + config.SIMULATOR.DEPTH_SENSOR.WIDTH = 256 + config.SIMULATOR.DEPTH_SENSOR.HEIGHT = 256 + config.BASELINE.ORBSLAM2.CAMERA_HEIGHT = config.SIMULATOR.DEPTH_SENSOR.POSITION[1] + config.BASELINE.ORBSLAM2.H_OBSTACLE_MIN = 0.3 * config.BASELINE.ORBSLAM2.CAMERA_HEIGHT + config.BASELINE.ORBSLAM2.H_OBSTACLE_MAX = 1.0 * config.BASELINE.ORBSLAM2.CAMERA_HEIGHT + config.BASELINE.ORBSLAM2.MIN_PTS_IN_OBSTACLE = config.SIMULATOR.DEPTH_SENSOR.WIDTH/2.0 + return + +class RandomAgent(object): + r"""Simplest agent, which returns random actions, + until reach the goal + """ + + def __init__( + self, + config + ): + super(RandomAgent, self).__init__() + self.num_actions = config.NUM_ACTIONS + self.dist_threshold_to_stop = config.DIST_TO_STOP + self.reset() + return + + def reset(self): + self.steps = 0 + return + + def update_internal_state(self, habitat_observation): + self.obs = habitat_observation + self.steps += 1 + return + + def is_goal_reached(self): + dist = self.obs["pointgoal"][0] + return dist <= self.dist_threshold_to_stop + + def act(self, habitat_observation=None, random_prob=1.0): + self.update_internal_state(habitat_observation) + # Act + # Check if we are done + if self.is_goal_reached(): + action = SIM_NAME_TO_ACTION[SimulatorActions.STOP.value] + else: + action = random.randint(0, self.num_actions - 1) + return action + + +class BlindAgent(RandomAgent): + def __init__( + self, + config + ): + super(BlindAgent, self).__init__() + self.pos_th = config.DIST_TO_STOP + self.angle_th = config.ANGLE_TH + self.reset() + return + + def decide_what_to_do(self): + distance_to_goal = self.obs["pointgoal"][0] + angle_to_goal = norm_ang(np.array(self.obs["pointgoal"][1])) + command = SIM_NAME_TO_ACTION[SimulatorActions.STOP.value] + if distance_to_goal <= self.pos_th: + return command + if abs(angle_to_goal) < self.angle_th: + command = "move_forward" + else: + if (angle_to_goal > 0) and (angle_to_goal < pi): + command = "look_left" + elif angle_to_goal > pi: + command = "look_right" + elif (angle_to_goal < 0) and (angle_to_goal > -pi): + command = "look_right" + else: + command = "look_left" + return command + + def act(self, habitat_observation=None, random_prob=0.1): + self.update_internal_state(habitat_observation) + # Act + if self.is_goal_reached(): + return SIM_NAME_TO_ACTION[SimulatorActions.STOP.value] + command = self.decide_what_to_do() + random_action = random.randint(0, self.num_actions - 1) + act_randomly = np.random.uniform(0, 1, 1) < random_prob + if act_randomly: + action = random_action + else: + action = SIM_NAME_TO_ACTION[command] + return action + + +class ORBSLAM2Agent(RandomAgent): + def __init__( + self, + config, + device=torch.device("cuda:0"), + ): + self.num_actions = config.NUM_ACTIONS + self.dist_threshold_to_stop = config.DIST_TO_STOP + self.slam_vocab_path = config.SLAM_VOCAB_PATH + assert os.path.isfile(self.slam_vocab_path) + self.slam_settings_path = config.SLAM_SETTINGS_PATH + assert os.path.isfile(self.slam_settings_path) + self.slam = orbslam2.System( + self.slam_vocab_path, self.slam_settings_path, orbslam2.Sensor.RGBD + ) + self.slam.set_use_viewer(False) + self.slam.initialize() + self.device = device + self.map_size_meters = config.MAP_SIZE + self.map_cell_size = config.MAP_CELL_SIZE + self.pos_th = config.DIST_REACHED_TH + self.next_wp_th = config.NEXT_WAYPOINT_TH + self.angle_th = config.ANGLE_TH + self.obstacle_th = config.MIN_PTS_IN_OBSTACLE + self.depth_denorm = config.DEPTH_DENORM + self.planned_waypoints = [] + self.mapper = DirectDepthMapper( + camera_height = config.CAMERA_HEIGHT, + near_th = config.D_OBSTACLE_MIN, + far_th = config.D_OBSTACLE_MAX, + h_min = config.H_OBSTACLE_MIN, + h_max = config.H_OBSTACLE_MAX, + map_size = config.MAP_SIZE, + map_cell_size = config.MAP_CELL_SIZE, + device = device) + self.planner = DifferentiableStarPlanner( + max_steps = config.PLANNER_MAX_STEPS, + preprocess = config.PREPROCESS_MAP, + beta = config.BETA, + device = device) + self.slam_to_world = 1.0 + self.timestep = 0.1 + self.timing = False + self.reset() + return + + def reset(self): + super(ORBSLAM2Agent, self).reset() + self.offset_to_goal = None + self.tracking_is_OK = False + self.waypointPose6D = None + self.unseen_obstacle = False + self.action_history = [] + self.planned_waypoints = [] + self.map2DObstacles = self.init_map2d() + n, ch, height, width = self.map2DObstacles.size() + self.coordinatesGrid = generate_2dgrid(height, width, False).to( + self.device + ) + self.pose6D = self.init_pose6d() + self.action_history = [] + self.pose6D_history = [] + self.position_history = [] + self.planned2Dpath = torch.zeros((0)) + self.slam.reset() + self.cur_time = 0 + self.toDoList = [] + self.waypoint_id = 0 + if self.device != torch.device("cpu"): + torch.cuda.empty_cache() + return + + def update_internal_state(self, habitat_observation): + super(ORBSLAM2Agent, self).update_internal_state( + habitat_observation + ) + self.cur_time += self.timestep + rgb, depth = self.rgb_d_from_observation(habitat_observation) + t = time.time() + try: + self.slam.process_image_rgbd(rgb, depth, self.cur_time) + if self.timing: + print(time.time() - t, "ORB_SLAM2") + self.tracking_is_OK = str(self.slam.get_tracking_state()) == "OK" + except BaseException: + print("Warning!!!! ORBSLAM processing frame error") + self.tracking_is_OK = False + if not self.tracking_is_OK: + self.reset() + t = time.time() + self.set_offset_to_goal(habitat_observation) + if self.tracking_is_OK: + trajectory_history = np.array(self.slam.get_trajectory_points()) + self.pose6D = homogenize_p( + torch.from_numpy(trajectory_history[-1])[1:] + .view(3, 4) + .to(self.device) + ).view(1, 4, 4) + self.trajectory_history = trajectory_history + if len(self.position_history) > 1: + previous_step = get_distance( + self.pose6D.view(4, 4), + torch.from_numpy(self.position_history[-1]) + .view(4, 4) + .to(self.device), + ) + if ( + SIM_ACTION_TO_NAME[self.action_history[-1]] + == "move_forward" + ): + self.unseen_obstacle = ( + previous_step.item() <= 0.001 + ) # hardcoded threshold for not moving + current_obstacles = self.mapper( + torch.from_numpy(depth).to(self.device).squeeze(), self.pose6D + ).to(self.device) + self.current_obstacles = current_obstacles + self.map2DObstacles = torch.max( + self.map2DObstacles, current_obstacles.unsqueeze(0).unsqueeze(0) + ) + if self.timing: + print(time.time() - t, "Mapping") + return True + + def init_pose6d(self): + return torch.eye(4).float().to(self.device) + + def map_size_in_cells(self): + return int(self.map_size_meters / self.map_cell_size) + + def init_map2d(self): + return ( + torch.zeros(1, + 1, + self.map_size_in_cells(), + self.map_size_in_cells()) + .float() + .to(self.device) + ) + + def get_orientation_on_map(self): + self.pose6D = self.pose6D.view(1, 4, 4) + return torch.tensor( + [ + [self.pose6D[0, 0, 0], self.pose6D[0, 0, 2]], + [self.pose6D[0, 2, 0], self.pose6D[0, 2, 2]], + ] + ) + + def get_position_on_map(self, do_floor=True): + return project_tps_into_worldmap( + self.pose6D.view(1, 4, 4), + self.map_cell_size, + self.map_size_meters, + do_floor, + ) + + def act(self, habitat_observation, random_prob=0.1): + # Update internal state + t = time.time() + cc = 0 + update_is_ok = self.update_internal_state(habitat_observation) + while not update_is_ok: + update_is_ok = self.update_internal_state(habitat_observation) + cc += 1 + if cc > 2: + break + if self.timing: + print(time.time() - t, " s, update internal state") + self.position_history.append( + self.pose6D.detach().cpu().numpy().reshape(1, 4, 4) + ) + success = self.is_goal_reached() + if success: + action = SIM_NAME_TO_ACTION[SimulatorActions.STOP.value] + self.action_history.append(action) + return action + # Plan action + t = time.time() + self.planned2Dpath, self.planned_waypoints = self.plan_path() + if self.timing: + print(time.time() - t, " s, Planning") + t = time.time() + # Act + if self.waypointPose6D is None: + self.waypointPose6D = self.get_valid_waypoint_pose6d() + if ( + self.is_waypoint_reached(self.waypointPose6D) + or not self.tracking_is_OK + ): + self.waypointPose6D = self.get_valid_waypoint_pose6d() + self.waypoint_id += 1 + action = self.decide_what_to_do() + # May be random? + random_action = random.randint(0, self.num_actions - 1) + what_to_do = np.random.uniform(0, 1, 1) + if what_to_do < random_prob: + action = random_action + if self.timing: + print(time.time() - t, " s, get action") + self.action_history.append(action) + return action + + def is_waypoint_good(self, pose6d): + p_init = self.pose6D.squeeze() + dist_diff = get_distance(p_init, pose6d) + valid = dist_diff > self.next_wp_th + return valid.item() + + def is_waypoint_reached(self, pose6d): + p_init = self.pose6D.squeeze() + dist_diff = get_distance(p_init, pose6d) + reached = dist_diff <= self.pos_th + return reached.item() + + def get_waypoint_dist_dir(self): + angle = get_direction( + self.pose6D.squeeze(), self.waypointPose6D.squeeze(), 0, 0 + ) + dist = get_distance( + self.pose6D.squeeze(), self.waypointPose6D.squeeze() + ) + return torch.cat( + [ + dist.view(1, 1), + torch.sin(angle).view(1, 1), + torch.cos(angle).view(1, 1), + ], + dim=1, + ) + + def get_valid_waypoint_pose6d(self): + p_next = self.planned_waypoints[0] + while not self.is_waypoint_good(p_next): + if len(self.planned_waypoints) > 1: + self.planned_waypoints = self.planned_waypoints[1:] + p_next = self.planned_waypoints[0] + else: + p_next = self.estimatedGoalPos6D.squeeze() + break + return p_next + + def set_offset_to_goal(self, observation): + self.offset_to_goal = ( + torch.from_numpy(observation["pointgoal"]).float().to(self.device) + ) + self.estimatedGoalPos2D = habitat_goalpos_to_mapgoal_pos( + self.offset_to_goal, + self.pose6D.squeeze(), + self.map_cell_size, + self.map_size_meters, + ) + self.estimatedGoalPos6D = planned_path2tps( + [self.estimatedGoalPos2D], + self.map_cell_size, + self.map_size_meters, + 1.0, + ).to(self.device)[0] + return + + def rgb_d_from_observation(self, habitat_observation): + rgb = habitat_observation["rgb"] + depth = None + if "depth" in habitat_observation: + depth = ( + self.depth_denorm * habitat_observation["depth"] + ) + return rgb, depth + + def prev_plan_is_not_valid(self): + if len(self.planned2Dpath) == 0: + return True + pp = torch.cat(self.planned2Dpath).detach().cpu().view(-1, 2) + binary_map = self.map2DObstacles.squeeze().detach() >= self.obstacle_th + obstacles_on_path = ( + binary_map[pp[:, 0].long(), pp[:, 1].long()] + ).long().sum().item() > 0 + return obstacles_on_path # obstacles_nearby or obstacles_on_path + + def rawmap2_planner_ready(self, rawmap, start_map, goal_map): + map1 = (rawmap / float(self.obstacle_th)) ** 2 + map1 = ( + torch.clamp(map1, min=0, max=1.0) + - start_map + - F.max_pool2d(goal_map, 3, stride=1, padding=1) + ) + return torch.relu(map1) + + def plan_path(self, overwrite=False): + t = time.time() + if ( + (not self.prev_plan_is_not_valid()) + and (not overwrite) + and (len(self.planned_waypoints) > 0) + ): + return self.planned2Dpath, self.planned_waypoints + self.waypointPose6D = None + current_pos = self.get_position_on_map() + start_map = torch.zeros_like(self.map2DObstacles).to(self.device) + start_map[ + 0, 0, current_pos[0, 0].long(), current_pos[0, 1].long() + ] = 1.0 + goal_map = torch.zeros_like(self.map2DObstacles).to(self.device) + goal_map[ + 0, + 0, + self.estimatedGoalPos2D[0, 0].long(), + self.estimatedGoalPos2D[0, 1].long(), + ] = 1.0 + path, cost = self.planner( + self.rawmap2_planner_ready( + self.map2DObstacles, start_map, goal_map + ).to(self.device), + self.coordinatesGrid.to(self.device), + goal_map.to(self.device), + start_map.to(self.device), + ) + if len(path) == 0: + return path, [] + if self.timing: + print(time.time() - t, " s, Planning") + t = time.time() + planned_waypoints = planned_path2tps( + path, self.map_cell_size, self.map_size_meters, 1.0, False + ).to(self.device) + return path, planned_waypoints + + def planner_prediction_to_command(self, p_next): + command = "stop" + p_init = self.pose6D.squeeze() + d_angle_rot_th = self.angle_th + pos_th = self.pos_th + if get_distance(p_init, p_next) <= pos_th: + return command + d_angle = angle_to_pi_2_minus_pi_2( + get_direction(p_init, p_next, ang_th=d_angle_rot_th, pos_th=pos_th) + ) + if abs(d_angle) < d_angle_rot_th: + command = "move_forward" + else: + if (d_angle > 0) and (d_angle < pi): + command = "look_left" + elif d_angle > pi: + command = "look_right" + elif (d_angle < 0) and (d_angle > -pi): + command = "look_right" + else: + command = "look_left" + return command + + def decide_what_to_do(self): + action = None + if self.is_goal_reached(): + action = SIM_NAME_TO_ACTION[SimulatorActions.STOP.value] + return action + if self.unseen_obstacle: + command = "look_right" + return SIM_NAME_TO_ACTION[command] + command = "stop" + command = self.planner_prediction_to_command(self.waypointPose6D) + return SIM_NAME_TO_ACTION[command] + +class ORBSLAM2MonodepthAgent(ORBSLAM2Agent): + def __init__( + self, + config, + device=torch.device("cuda:0"), + monocheckpoint = 'baselines/slambased/data/mp3d_resnet50.pth', + ): + self.num_actions = config.NUM_ACTIONS + self.dist_threshold_to_stop = config.DIST_TO_STOP + self.slam_vocab_path = config.SLAM_VOCAB_PATH + assert os.path.isfile(self.slam_vocab_path) + self.slam_settings_path = config.SLAM_SETTINGS_PATH + assert os.path.isfile(self.slam_settings_path) + self.slam = orbslam2.System( + self.slam_vocab_path, self.slam_settings_path, orbslam2.Sensor.RGBD + ) + self.slam.set_use_viewer(False) + self.slam.initialize() + self.device = device + self.map_size_meters = config.MAP_SIZE + self.map_cell_size = config.MAP_CELL_SIZE + self.pos_th = config.DIST_REACHED_TH + self.next_wp_th = config.NEXT_WAYPOINT_TH + self.angle_th = config.ANGLE_TH + self.obstacle_th = config.MIN_PTS_IN_OBSTACLE + self.depth_denorm = config.DEPTH_DENORM + self.planned_waypoints = [] + self.mapper = DirectDepthMapper( + camera_height = config.CAMERA_HEIGHT, + near_th = config.D_OBSTACLE_MIN, + far_th = config.D_OBSTACLE_MAX, + h_min = config.H_OBSTACLE_MIN, + h_max = config.H_OBSTACLE_MAX, + map_size = config.MAP_SIZE, + map_cell_size = config.MAP_CELL_SIZE, + device = device) + self.planner = DifferentiableStarPlanner( + max_steps = config.PLANNER_MAX_STEPS, + preprocess = config.PREPROCESS_MAP, + beta = config.BETA, + device = device) + self.slam_to_world = 1.0 + self.timestep = 0.1 + self.timing = False + self.checkpoint = monocheckpoint + if not os.path.isfile(self.checkpoint): + mp3d_url = 'http://cmp.felk.cvut.cz/~mishkdmy/navigation/mp3d_ft_monodepth_resnet50.pth' + suncg_me_url = 'http://cmp.felk.cvut.cz/~mishkdmy/navigation/suncg_me_resnet.pth' + suncg_mf_url = 'http://cmp.felk.cvut.cz/~mishkdmy/navigation/suncg_mf_resnet.pth' + url = mp3d_url + print ("No monodepth checkpoint found. Downloading...", url) + download(url, self.checkpoint) + self.monodepth = MonoDepthEstimator(self.checkpoint) + self.reset() + return + + def rgb_d_from_observation(self, habitat_observation): + rgb = habitat_observation["rgb"] + depth = ResizePIL2(self.monodepth.compute_depth(PIL.Image.fromarray(rgb).resize((320,320))), 256)#/1.75 + depth[depth > 3.0] = 0 + depth[depth < 0.1] = 0 + return rgb, np.array(depth).astype(np.float32) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--agent-type", + default="orbslam2-rgbd", + choices=["blind", + "orbslam2-rgbd", + "orbslam2-rgb-monod"], + ) + parser.add_argument( + "--task-config", type=str, default="tasks/pointnav_rgbd.yaml" + ) + args = parser.parse_args() + + config = get_config() + agent_config = cfg() + config.defrost() + config.BASELINE = agent_config.BASELINE + make_good_config_for_orbslam2(config) + + if args.agent_type == 'blind': + agent = BlindAgent(config.BASELINE.ORBSLAM2) + elif args.agent_type == 'orbslam2-rgbd': + agent = ORBSLAM2Agent(config.BASELINE.ORBSLAM2) + elif args.agent_type == 'orbslam2-rgb-monod': + agent = ORBSLAM2MonodepthAgent(config.BASELINE.ORBSLAM2) + else: + raise ValueError(args.agent_type, 'is unknown type of agent') + benchmark = habitat.Benchmark(args.task_config) + metrics = benchmark.evaluate(agent) + for k, v in metrics.items(): + habitat.logger.info("{}: {:.3f}".format(k, v)) + + +if __name__ == "__main__": + main() + diff --git a/baselines/config/default.py b/baselines/config/default.py index fe44c93965f4a3f4f59a65b07f0e16f807348312..d100fd79bdaff663c265dc8af1bf09464351233e 100644 --- a/baselines/config/default.py +++ b/baselines/config/default.py @@ -5,8 +5,9 @@ # LICENSE file in the root directory of this source tree. import os +import numpy as np from typing import Optional - +from habitat import get_config from habitat.config import Config as CN DEFAULT_CONFIG_DIR = "configs/" @@ -17,7 +18,7 @@ DEFAULT_CONFIG_DIR = "configs/" _C = CN() _C.SEED = 100 # ----------------------------------------------------------------------------- -# BASELINES +# BASELINE # ----------------------------------------------------------------------------- _C.BASELINE = CN() # ----------------------------------------------------------------------------- @@ -27,6 +28,28 @@ _C.BASELINE.RL = CN() _C.BASELINE.RL.SUCCESS_REWARD = 10.0 _C.BASELINE.RL.SLACK_REWARD = -0.01 # ----------------------------------------------------------------------------- +# ORBSLAM2 BASELINE +# ----------------------------------------------------------------------------- +_C.BASELINE.ORBSLAM2 = CN() +_C.BASELINE.ORBSLAM2.SLAM_VOCAB_PATH = "baselines/slambased/data/ORBvoc.txt" +_C.BASELINE.ORBSLAM2.SLAM_SETTINGS_PATH = "baselines/slambased/data/mp3d3_small1k.yaml" +_C.BASELINE.ORBSLAM2.MAP_CELL_SIZE = 0.1 +_C.BASELINE.ORBSLAM2.MAP_SIZE = 40 +_C.BASELINE.ORBSLAM2.CAMERA_HEIGHT = get_config().SIMULATOR.DEPTH_SENSOR.POSITION[1] +_C.BASELINE.ORBSLAM2.BETA = 100 +_C.BASELINE.ORBSLAM2.H_OBSTACLE_MIN = 0.3 * _C.BASELINE.ORBSLAM2.CAMERA_HEIGHT +_C.BASELINE.ORBSLAM2.H_OBSTACLE_MAX = 1.0 * _C.BASELINE.ORBSLAM2.CAMERA_HEIGHT +_C.BASELINE.ORBSLAM2.D_OBSTACLE_MIN = 0.1 +_C.BASELINE.ORBSLAM2.D_OBSTACLE_MAX = 4.0 +_C.BASELINE.ORBSLAM2.PREPROCESS_MAP = True +_C.BASELINE.ORBSLAM2.MIN_PTS_IN_OBSTACLE = get_config().SIMULATOR.DEPTH_SENSOR.WIDTH/2.0 +_C.BASELINE.ORBSLAM2.ANGLE_TH = float(np.deg2rad(15)) +_C.BASELINE.ORBSLAM2.DIST_REACHED_TH = 0.15 +_C.BASELINE.ORBSLAM2.NEXT_WAYPOINT_TH = 0.5 +_C.BASELINE.ORBSLAM2.NUM_ACTIONS = 3 +_C.BASELINE.ORBSLAM2.DIST_TO_STOP = 0.05 +_C.BASELINE.ORBSLAM2.PLANNER_MAX_STEPS = 500 +_C.BASELINE.ORBSLAM2.DEPTH_DENORM = get_config().SIMULATOR.DEPTH_SENSOR.MAX_DEPTH def cfg( diff --git a/baselines/slambased/README.md b/baselines/slambased/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d70d3ade3bca23c07ff653b4f6f86d3e0d974cc4 --- /dev/null +++ b/baselines/slambased/README.md @@ -0,0 +1,39 @@ +### Handcrafted agent baseline adopted from the paper "Benchmarking Classic and Learned Navigation in Complex 3D Environments" + +Project website: https://sites.google.com/view/classic-vs-learned-navigation +Paper: https://arxiv.org/abs/1901.10915 + +If you use this code or the provided environments in your research, please cite the following: + + @ARTICLE{Navigation2019, + author = {{Mishkin}, Dmytro and {Dosovitskiy}, Alexey and {Koltun}, Vladlen}, + title = "{Benchmarking Classic and Learned Navigation in Complex 3D Environments}", + year = 2019, + month = Jan, + archivePrefix = {arXiv}, + eprint = {1901.10915}, + } + + + +## Dependencies: + +- conda +- numpy +- pytorch +- ORBSLAM2 + + +## Tested with: +- Ubuntu 16.04 +- python 3.6 +- pytorch 0.4, 1.0 + + +- Install Anaconda https://www.anaconda.com/download/#linux + +- Install dependencies via ./install_deps.sh. It should install everything except the datasets. + +Simple example of working with agents is shown in (../handcrafted-agent-example.ipynb) + + diff --git a/baselines/slambased/data/mp3d3_small1k.yaml b/baselines/slambased/data/mp3d3_small1k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c814c080db6abd7dab6d4ab57ffe449c0b31181d --- /dev/null +++ b/baselines/slambased/data/mp3d3_small1k.yaml @@ -0,0 +1,69 @@ +%YAML:1.0 + +#-------------------------------------------------------------------------------------------- +# Camera Parameters. Adjust them! +#-------------------------------------------------------------------------------------------- + +# Camera calibration and distortion parameters (OpenCV) +#For resolution 256x256, FOV 90 deg +Camera.fx: 128.0 +Camera.fy: 128.0 +Camera.cx: 127.0 +Camera.cy: 127.0 + + +Camera.k1: 0.0 +Camera.k2: 0.0 +Camera.p1: 0.0 +Camera.p2: 0.0 + +# Camera frames per second +Camera.fps: 30.0 + +# IR projector baseline times fx (aprox.) +Camera.bf: 50.0 + +# Color order of the images (0: BGR, 1: RGB. It is ignored if images are grayscale) +Camera.RGB: 1 + +# Close/Far threshold. Baseline times. +#ThDepth: 40.0 +ThDepth: 70.0 + +# Deptmap values factor +DepthMapFactor: 1.0 + +#-------------------------------------------------------------------------------------------- +# ORB Parameters +#-------------------------------------------------------------------------------------------- + +# ORB Extractor: Number of features per image +ORBextractor.nFeatures: 1000 + +# ORB Extractor: Scale factor between levels in the scale pyramid +ORBextractor.scaleFactor: 1.2 + +# ORB Extractor: Number of levels in the scale pyramid +ORBextractor.nLevels: 8 + +# ORB Extractor: Fast threshold +# Image is divided in a grid. At each cell FAST are extracted imposing a minimum response. +# Firstly we impose iniThFAST. If no corners are detected we impose a lower value minThFAST +# You can lower these values if your images have low contrast +ORBextractor.iniThFAST: 5 +ORBextractor.minThFAST: 1 + +#-------------------------------------------------------------------------------------------- +# Viewer Parameters +#-------------------------------------------------------------------------------------------- +Viewer.KeyFrameSize: 0.1 +Viewer.KeyFrameLineWidth: 1 +Viewer.GraphLineWidth: 1 +Viewer.PointSize:2 +Viewer.CameraSize: 0.15 +Viewer.CameraLineWidth: 2 +Viewer.ViewpointX: 0 +Viewer.ViewpointY: -10 +Viewer.ViewpointZ: -0.1 +Viewer.ViewpointF: 2000 + diff --git a/baselines/slambased/data/slam-based-agent.png b/baselines/slambased/data/slam-based-agent.png new file mode 100644 index 0000000000000000000000000000000000000000..3aafc634fa034e68bb4fea5f50a72406a554db56 Binary files /dev/null and b/baselines/slambased/data/slam-based-agent.png differ diff --git a/baselines/slambased/install_deps.sh b/baselines/slambased/install_deps.sh new file mode 100755 index 0000000000000000000000000000000000000000..75002474f413b883da14c40423991d736ef744d8 --- /dev/null +++ b/baselines/slambased/install_deps.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +DIR1=$(pwd) +MAINDIR=$(pwd)/3rdparty +mkdir ${MAINDIR} +cd ${MAINDIR} +#conda create -y -n "HandcraftedAgents" python=3.6 +source activate HandcraftedAgents +conda install opencv -y +conda install pytorch torchvision -c pytorch -y +conda install -c conda-forge imageio -y +conda install ffmpeg -c conda-forge -y +cd ${MAINDIR} +mkdir eigen3 +cd eigen3 +wget http://bitbucket.org/eigen/eigen/get/3.3.5.tar.gz +tar -xzf 3.3.5.tar.gz +cd eigen-eigen-b3f3d4950030 +mkdir build +cd build +cmake .. -DCMAKE_INSTALL_PREFIX=${MAINDIR}/eigen3_installed/ +make install +cd ${MAINDIR} +wget https://sourceforge.net/projects/glew/files/glew/2.1.0/glew-2.1.0.zip +unzip glew-2.1.0.zip +cd glew-2.1.0/ +cd build +cmake ./cmake -DCMAKE_INSTALL_PREFIX=${MAINDIR}/glew_installed +make -j4 +make install +cd ${MAINDIR} +#pip install numpy --upgrade +rm Pangolin -rf +git clone https://github.com/stevenlovegrove/Pangolin.git +cd Pangolin +mkdir build +cd build +cmake .. -DCMAKE_PREFIX_PATH=${MAINDIR}/glew_installed/ -DCMAKE_LIBRARY_PATH=${MAINDIR}/glew_installed/lib/ -DCMAKE_INSTALL_PREFIX=${MAINDIR}/pangolin_installed +cmake --build . +cd ${MAINDIR} +rm ORB_SLAM2 -rf +rm ORB_SLAM2-PythonBindings -rf +git clone https://github.com/ducha-aiki/ORB_SLAM2 +git clone https://github.com/ducha-aiki/ORB_SLAM2-PythonBindings +cd ${MAINDIR}/ORB_SLAM2 +sed -i "s,cmake .. -DCMAKE_BUILD_TYPE=Release,cmake .. -DCMAKE_BUILD_TYPE=Release -DEIGEN3_INCLUDE_DIR=${MAINDIR}/eigen3_installed/include/eigen3/ -DCMAKE_INSTALL_PREFIX=${MAINDIR}/ORBSLAM2_installed ,g" build.sh +ln -s ${MAINDIR}/eigen3_installed/include/eigen3/Eigen ${MAINDIR}/ORB_SLAM2/Thirdparty/g2o/g2o/core/Eigen +./build.sh +cd build +make install +cd ${MAINDIR} +cd ORB_SLAM2-PythonBindings/src +ln -s ${MAINDIR}/eigen3_installed/include/eigen3/Eigen Eigen +cd ${MAINDIR}/ORB_SLAM2-PythonBindings +mkdir build +cd build +CONDA_DIR=$(dirname $(dirname $(which conda))) +sed -i "s,lib/python3.5/dist-packages,${CONDA_DIR}/envs/HandcraftedAgents/lib/python3.6/site-packages/,g" ../CMakeLists.txt +cmake .. -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") -DPYTHON_LIBRARY=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))")/libpython3.6m.so -DPYTHON_EXECUTABLE:FILEPATH=`which python` -DCMAKE_LIBRARY_PATH=${MAINDIR}/ORBSLAM2_installed/lib -DCMAKE_INCLUDE_PATH=${MAINDIR}/ORBSLAM2_installed/include;${MAINDIR}/eigen3_installed/include/eigen3 -DCMAKE_INSTALL_PREFIX=${MAINDIR}/pyorbslam2_installed +make +make install +cp ${MAINDIR}/ORB_SLAM2/Vocabulary/ORBvoc.txt ${DIR1}/data/ diff --git a/baselines/slambased/mappers.py b/baselines/slambased/mappers.py new file mode 100644 index 0000000000000000000000000000000000000000..4e873ba685af6daaf9107ea22b10f48bac94eb42 --- /dev/null +++ b/baselines/slambased/mappers.py @@ -0,0 +1,122 @@ +import numpy as np +import torch +import torch.nn as nn +from baselines.slambased.reprojection import ( + get_map_size_in_cells, + project2d_pcl_into_worldmap, + reproject_local_to_global, +) + + +def depth2local3d(depth, fx, fy, cx, cy): + """Projects depth map to 3d point cloud + with origin in the camera focus + """ + device = depth.device + h, w = depth.squeeze().size() + x = torch.linspace(0, w - 1, w).to(device) + y = torch.linspace(0, h - 1, h).to(device) + xv, yv = torch.meshgrid([x, y]) + dfl = depth.t().flatten() + return torch.cat( + [ + (dfl * (xv.flatten() - cx) / fx).unsqueeze(-1), # x + (dfl * (yv.flatten() - cy) / fy).unsqueeze(-1), # y + dfl.unsqueeze(-1), + ], + dim=1, + ) # z + + +def pcl_to_obstacles(pts3d, map_size=40, cell_size=0.2, min_pts= 10): + """Counts number of 3d points in 2d map cell. + Height is sum-pooled. + """ + device = pts3d.device + map_size_in_cells = get_map_size_in_cells(map_size, cell_size) - 1 + init_map = torch.zeros( + (map_size_in_cells, map_size_in_cells), device=device + ) + if len(pts3d) <= 1: + return init_map + num_pts, dim = pts3d.size() + pts2d = torch.cat([pts3d[:, 2:3], pts3d[:, 0:1]], dim=1) + data_idxs = torch.round( + project2d_pcl_into_worldmap(pts2d, map_size, cell_size) + ) + if len(data_idxs) > min_pts: + u, counts = np.unique( + data_idxs.detach().cpu().numpy(), axis=0, return_counts=True + ) + init_map[u[:, 0], u[:, 1]] = torch.from_numpy(counts).to( + dtype=torch.float32, device=device + ) + return init_map + + +class DirectDepthMapper(nn.Module): + """Estimates obstacle map given the depth image + ToDo: replace numpy histogram counting with differentiable + pytorch soft count like in + https://papers.nips.cc/paper/7545-unsupervised-learning-of-shape-and-pose-with-differentiable-point-clouds.pdf + """ + + def __init__( + self, + camera_height=0, + near_th=0.1, + far_th=4.0, + h_min=0.0, + h_max=1.0, + map_size=40, + map_cell_size=0.1, + device=torch.device("cpu"), + **kwargs + ): + super(DirectDepthMapper, self).__init__() + self.device = device + self.near_th = near_th + self.far_th = far_th + self.h_min_th = h_min + self.h_max_th = h_max + self.camera_height = camera_height + self.map_size_meters = map_size + self.map_cell_size = map_cell_size + return + + def forward(self, depth, pose=torch.eye(4).float()): + self.device = depth.device + # Works for FOV = 90 degrees + # Should be adjusted, if FOV changed + self.fx = float(depth.size(1)) / 2.0 + self.fy = float(depth.size(0)) / 2.0 + self.cx = int(self.fx) - 1 + self.cy = int(self.fy) - 1 + pose = pose.to(self.device) + local_3d_pcl = depth2local3d( + depth, self.fx, self.fy, self.cx, self.cy + ) + idxs = (torch.abs(local_3d_pcl[:, 2]) < self.far_th) * ( + torch.abs(local_3d_pcl[:, 2]) >= self.near_th + ) + survived_points = local_3d_pcl[idxs] + if len(survived_points) < 20: + map_size_in_cells = ( + get_map_size_in_cells(self.map_size_meters, + self.map_cell_size) - 1 + ) + init_map = torch.zeros( + (map_size_in_cells, map_size_in_cells), device=self.device + ) + return init_map + global_3d_pcl = reproject_local_to_global(survived_points, pose)[:, :3] + # Because originally y looks down and from agent camera height + global_3d_pcl[:, 1] = -global_3d_pcl[:, 1] + self.camera_height + idxs = (global_3d_pcl[:, 1] > self.h_min_th) * ( + global_3d_pcl[:, 1] < self.h_max_th + ) + global_3d_pcl = global_3d_pcl[idxs] + obstacle_map = pcl_to_obstacles( + global_3d_pcl, self.map_size_meters, self.map_cell_size + ) + return obstacle_map diff --git a/baselines/slambased/monodepth.py b/baselines/slambased/monodepth.py new file mode 100644 index 0000000000000000000000000000000000000000..d047a61703586ec1779b84a5af9a1cd304e8a386 --- /dev/null +++ b/baselines/slambased/monodepth.py @@ -0,0 +1,573 @@ +""" +The code below is taked from https://github.com/JunjH/Revisiting_Single_Depth_Estimation +Revisiting Single Image Depth Estimation: Toward Higher Resolution Maps With Accurate Object Boundaries +Junjie Hu and Mete Ozay and Yan Zhang and Takayuki Okatani +WACV 2019 +""" + + +import torch +import torch.nn.parallel +""" +ResNet code gently borrowed from +https://github.com/pytorch/vision/blob/master/torchvision/models/py +""" + +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo +import torch.nn.functional as F +import torch +import numpy as np +import pdb +import os +from PIL import Image +accimage = None +from torchvision import transforms, utils + + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + "3x3 convolution with padding" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AvgPool2d(7, stride=1) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], 'pretrained_model/encoder')) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + return model + + +class model(nn.Module): + def __init__(self, Encoder, num_features, block_channel): + + super(model, self).__init__() + + self.E = Encoder + self.D = D(num_features) + self.MFF = MFF(block_channel) + self.R = R(block_channel) + + + def forward(self, x): + x_block1, x_block2, x_block3, x_block4 = self.E(x) + x_decoder = self.D(x_block1, x_block2, x_block3, x_block4) + x_mff = self.MFF(x_block1, x_block2, x_block3, x_block4,[x_decoder.size(2),x_decoder.size(3)]) + out = self.R(torch.cat((x_decoder, x_mff), 1)) + + return out + + +class _UpProjection(nn.Sequential): + + def __init__(self, num_input_features, num_output_features): + super(_UpProjection, self).__init__() + + self.conv1 = nn.Conv2d(num_input_features, num_output_features, + kernel_size=5, stride=1, padding=2, bias=False) + self.bn1 = nn.BatchNorm2d(num_output_features) + self.relu = nn.ReLU(inplace=True) + self.conv1_2 = nn.Conv2d(num_output_features, num_output_features, + kernel_size=3, stride=1, padding=1, bias=False) + self.bn1_2 = nn.BatchNorm2d(num_output_features) + + self.conv2 = nn.Conv2d(num_input_features, num_output_features, + kernel_size=5, stride=1, padding=2, bias=False) + self.bn2 = nn.BatchNorm2d(num_output_features) + + def forward(self, x, size): + x = F.upsample(x, size=size, mode='bilinear') + x_conv1 = self.relu(self.bn1(self.conv1(x))) + bran1 = self.bn1_2(self.conv1_2(x_conv1)) + bran2 = self.bn2(self.conv2(x)) + + out = self.relu(bran1 + bran2) + + return out + +class E_resnet(nn.Module): + + def __init__(self, original_model, num_features = 2048): + super(E_resnet, self).__init__() + self.conv1 = original_model.conv1 + self.bn1 = original_model.bn1 + self.relu = original_model.relu + self.maxpool = original_model.maxpool + + self.layer1 = original_model.layer1 + self.layer2 = original_model.layer2 + self.layer3 = original_model.layer3 + self.layer4 = original_model.layer4 + + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x_block1 = self.layer1(x) + x_block2 = self.layer2(x_block1) + x_block3 = self.layer3(x_block2) + x_block4 = self.layer4(x_block3) + + return x_block1, x_block2, x_block3, x_block4 + + +class D(nn.Module): + + def __init__(self, num_features = 2048): + super(D, self).__init__() + self.conv = nn.Conv2d(num_features, num_features // + 2, kernel_size=1, stride=1, bias=False) + num_features = num_features // 2 + self.bn = nn.BatchNorm2d(num_features) + + self.up1 = _UpProjection( + num_input_features=num_features, num_output_features=num_features // 2) + num_features = num_features // 2 + + self.up2 = _UpProjection( + num_input_features=num_features, num_output_features=num_features // 2) + num_features = num_features // 2 + + self.up3 = _UpProjection( + num_input_features=num_features, num_output_features=num_features // 2) + num_features = num_features // 2 + + self.up4 = _UpProjection( + num_input_features=num_features, num_output_features=num_features // 2) + num_features = num_features // 2 + + + def forward(self, x_block1, x_block2, x_block3, x_block4): + x_d0 = F.relu(self.bn(self.conv(x_block4))) + x_d1 = self.up1(x_d0, [x_block3.size(2), x_block3.size(3)]) + x_d2 = self.up2(x_d1, [x_block2.size(2), x_block2.size(3)]) + x_d3 = self.up3(x_d2, [x_block1.size(2), x_block1.size(3)]) + x_d4 = self.up4(x_d3, [x_block1.size(2)*2, x_block1.size(3)*2]) + + return x_d4 + +class MFF(nn.Module): + + def __init__(self, block_channel, num_features=64): + + super(MFF, self).__init__() + + self.up1 = _UpProjection( + num_input_features=block_channel[0], num_output_features=16) + + self.up2 = _UpProjection( + num_input_features=block_channel[1], num_output_features=16) + + self.up3 = _UpProjection( + num_input_features=block_channel[2], num_output_features=16) + + self.up4 = _UpProjection( + num_input_features=block_channel[3], num_output_features=16) + + self.conv = nn.Conv2d( + num_features, num_features, kernel_size=5, stride=1, padding=2, bias=False) + self.bn = nn.BatchNorm2d(num_features) + + + def forward(self, x_block1, x_block2, x_block3, x_block4, size): + x_m1 = self.up1(x_block1, size) + x_m2 = self.up2(x_block2, size) + x_m3 = self.up3(x_block3, size) + x_m4 = self.up4(x_block4, size) + + x = self.bn(self.conv(torch.cat((x_m1, x_m2, x_m3, x_m4), 1))) + x = F.relu(x) + + return x + + +class R(nn.Module): + def __init__(self, block_channel): + + super(R, self).__init__() + + num_features = 64 + block_channel[3]//32 + self.conv0 = nn.Conv2d(num_features, num_features, + kernel_size=5, stride=1, padding=2, bias=False) + self.bn0 = nn.BatchNorm2d(num_features) + + self.conv1 = nn.Conv2d(num_features, num_features, + kernel_size=5, stride=1, padding=2, bias=False) + self.bn1 = nn.BatchNorm2d(num_features) + + self.conv2 = nn.Conv2d( + num_features, 1, kernel_size=5, stride=1, padding=2, bias=True) + + def forward(self, x): + x0 = self.conv0(x) + x0 = self.bn0(x0) + x0 = F.relu(x0) + + x1 = self.conv1(x0) + x1 = self.bn1(x1) + x1 = F.relu(x1) + + x2 = self.conv2(x1) + + return x2 +def _is_pil_image(img): + return isinstance(img, Image.Image) + +def _is_numpy_image(img): + return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) + +class Scale(object): + def __init__(self, size): + self.size = size + + def __call__(self, image): + image = self.changeScale(image,self.size) + + return image + + def changeScale(self, img, size, interpolation=Image.BILINEAR): + ow, oh = size + + return img.resize((ow, oh), interpolation) + +class CenterCrop(object): + def __init__(self, size): + self.size = size + + def __call__(self, image): + image = self.centerCrop(image,self.size) + + return image + + def centerCrop(self,image, size): + w1, h1 = image.size + tw, th = size + + if w1 == tw and h1 == th: + return image + + x1 = int(round((w1 - tw) / 2.)) + y1 = int(round((h1 - th) / 2.)) + + image = image.crop((x1, y1, tw+x1, th+y1)) + + return image + + +class ToTensor(object): + """Convert a ``PIL.Image`` or ``numpy.ndarray`` to tensor. + Converts a PIL.Image or numpy.ndarray (H x W x C) in the range + [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]. + """ + + def __call__(self, image): + image = self.to_tensor(image) + + return image + + + def to_tensor(self,pic): + if not(_is_pil_image(pic) or _is_numpy_image(pic)): + raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic))) + + if isinstance(pic, np.ndarray): + + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img.float().div(255) + + + if accimage is not None and isinstance(pic, accimage.Image): + nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) + pic.copyto(nppic) + return torch.from_numpy(nppic) + + # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + # put it from HWC to CHW format + # yikes, this transpose takes 80% of the loading time/CPU + img = img.transpose(0, 1).transpose(0, 2).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float().div(255) + else: + return img + + + +class Normalize(object): + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, image): + image = self.normalize(image, self.mean, self.std) + + return image + + def normalize(self, tensor, mean, std): + for t, m, s in zip(tensor, mean, std): + t.sub_(m).div_(s) + + return tensor + + + +def define_model(is_resnet, is_densenet, is_senet): + if is_resnet: + original_model = resnet50(pretrained = False) + Encoder = E_resnet(original_model) + model1 = model(Encoder, num_features=2048, block_channel = [256, 512, 1024, 2048]) + if is_densenet: + original_model = dendensenet161(pretrained=False) + Encoder = E_densenet(original_model) + model1 = model(Encoder, num_features=2208, block_channel = [192, 384, 1056, 2208]) + if is_senet: + original_model = senet154(pretrained=False) + Encoder = E_senet(original_model) + model1 = model(Encoder, num_features=2048, block_channel = [256, 512, 1024, 2048]) + + return model1 + + +class MonoDepthEstimator: + def __init__(self, checkpoint='./pretrained_model/model_resnet'): + self.model = define_model(is_resnet=True, is_densenet=False, is_senet=False) + self.model = torch.nn.DataParallel(self.model).cuda() + cpt = torch.load(checkpoint) + if 'state_dict' in cpt.keys(): + cpt = cpt['state_dict'] + self.model.load_state_dict(cpt) + self.model.eval() + self.init_preprocessor() + + def init_preprocessor(self): + __imagenet_stats = {'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225]} + + self.transform = transforms.Compose([ + Scale([320, 240]), + #CenterCrop([304, 228]), + ToTensor(), + Normalize(__imagenet_stats['mean'], + __imagenet_stats['std']) + ]) + + def preprocess(self, image): + image_torch = self.transform(image).unsqueeze(0) + return image_torch.cuda() + + def compute_depth(self, image): + # Input: image is a PIL image + # Output: depth is a numpy array + image_torch = self.preprocess(image) + #print(image_torch.size()) + depth_torch = self.model(image_torch) + depth = depth_torch.view(depth_torch.size(2),depth_torch.size(3)).data.cpu().numpy() + return depth + diff --git a/baselines/slambased/path_planners.py b/baselines/slambased/path_planners.py new file mode 100644 index 0000000000000000000000000000000000000000..adc2eb97bebf5db3c2ad971d86295f1c08179579 --- /dev/null +++ b/baselines/slambased/path_planners.py @@ -0,0 +1,512 @@ +import numpy as np +import torch +import torch.nn.functional as F +import torch.nn as nn +import matplotlib.pyplot as plt +from baselines.slambased.utils import generate_2dgrid + + +def safe_roi_2d(array2d, ymin, ymax, xmin, xmax): + (h, w) = array2d.shape + return max(0, ymin), min(ymax, h), max(0, xmin), min(xmax, w) + + +def f2ind(ten, i): + # Float to index + return torch.round(ten[i]).long() + + +def init_neights_to_channels(ks=3): + r"""Convolutional kernel, + which maps nighborhood into channels + """ + weights = np.zeros((ks * ks, 1, ks, ks), dtype=np.float32) + for y in range(ks): + for x in range(ks): + weights[x * ks + y, 0, y, x] = 1.0 + return weights + + +class SoftArgMin(nn.Module): + def __init__(self, beta=5): + super(SoftArgMin, self).__init__() + self.beta = beta + return + + def forward(self, x, coords2d=None): + bx_sm = F.softmax(self.beta * (-x).view(1, -1), dim=1) + if coords2d is None: + coords2d = generate_2dgrid(x.size(2), x.size(3), False) + coords2d_flat = coords2d.view(2, -1) + return (bx_sm.expand_as(coords2d_flat) * coords2d_flat).sum( + dim=1 + ) / bx_sm.sum(dim=1) + + +class HardArgMin(nn.Module): + def __init__(self): + super(HardArgMin, self).__init__() + return + + def forward(self, x, coords2d=None): + val, idx = x.view(-1).min(dim=0) + if coords2d is None: + coords2d = generate_2dgrid(x.size(2), x.size(3), False) + coords2d_flat = coords2d.view(2, -1) + return coords2d_flat[:, idx].view(2) + + +class DifferentiableStarPlanner(nn.Module): + def __init__( + self, + max_steps=500, + visualize=False, + preprocess=False, + beta=100, + connectivity="eight", + device=torch.device("cpu"), + **kwargs + ): + super(DifferentiableStarPlanner, self).__init__() + self.eps = 1e-12 + self.max_steps = max_steps + self.visualize = visualize + self.inf = 1e7 + self.ob_cost = 10000.0 + self.device = device + self.beta = beta + self.preprocess = preprocess + # self.argmin = SoftArgMin(beta) + self.argmin = HardArgMin() + self.neights2channels = nn.Conv2d(1, 9, kernel_size=(3, 3), bias=False) + self.neights2channels.weight.data = torch.from_numpy( + init_neights_to_channels(3) + ) + self.neights2channels.to(device) + self.preprocessNet = nn.Conv2d( + 1, 1, kernel_size=(3, 3), padding=1, bias=False + ) + self.preprocessNet.weight.data = torch.from_numpy( + np.array( + [ + [ + [ + [0.00001, 0.0001, 0.00001], + [0.0001, 1, 0.0001], + [0.00001, 0.0001, 0.00001], + ] + ] + ], + dtype=np.float32, + ) + ) + self.preprocessNet.to(device) + if connectivity == "eight": + self.gx_to_right = nn.Conv2d(1, 1, kernel_size=(1, 3), bias=False) + self.gx_to_right.weight.data = torch.from_numpy( + np.array([[[[0, 1, -1]]]], dtype=np.float32) + ) + self.gx_to_right.to(device) + + self.gx_to_left = nn.Conv2d(1, 1, kernel_size=(1, 3), bias=False) + self.gx_to_left.weight.data = torch.from_numpy( + np.array([[[[-1, 1, 0]]]], dtype=np.float32) + ) + self.gx_to_left.to(device) + + self.gy_to_up = nn.Conv2d(1, 1, kernel_size=(3, 1), bias=False) + self.gy_to_up.weight.data = torch.from_numpy( + np.array([[[[0], [1], [-1]]]], dtype=np.float32) + ) + self.gy_to_up.to(device) + + self.gy_to_down = nn.Conv2d(1, 1, kernel_size=(3, 1), bias=False) + self.gy_to_down.weight.data = torch.from_numpy( + np.array([[[[-1], [1], [0]]]], dtype=np.float32) + ) + self.gy_to_down.to(device) + else: + raise ValueError('Only "eight" connectivity now supported') + return + + def preprocess_obstacle_map(self, obstacle_map): + if self.preprocess: + return self.preprocessNet( + obstacle_map + ) + return obstacle_map + + def coords2grid(self, node_coords, h, w): + grid = node_coords.squeeze() - torch.FloatTensor( + (h / 2.0, w / 2.0) + ).to(self.device) + grid = grid / torch.FloatTensor((h / 2.0, w / 2.0)).to(self.device) + return grid.view(1, 1, 1, 2).flip(3) + + def init_closelistmap(self): + return torch.zeros_like( + self.start_map + ).float() + + def init_openlistmap(self): + return self.start_map.clone() + + def init_g_map(self): + return torch.clamp( + self.inf + * (torch.ones_like(self.start_map) - self.start_map.clone()), + min=0, + max=self.inf, + ) + + def safe_roi_2d(self, ymin, ymax, xmin, xmax): + return ( + int(max(0, torch.round(ymin).item())), + int(min(torch.round(ymax).item(), self.height)), + int(max(0, torch.round(xmin).item())), + int(min(torch.round(xmax).item(), self.width)), + ) + + def forward( + self, + obstacles, + coords, + start_map, + goal_map, + non_obstacle_cost_map=None, + additional_steps=50, + return_path=True, + ): + self.trav_init_time = 0 + self.trav_mask_time = 0 + self.trav_soft_time = 0 + self.conv_time = 0 + self.close_time = 0 + + self.obstacles = self.preprocess_obstacle_map( + obstacles.to(self.device) + ) + self.start_map = start_map.to(self.device) + self.been_there = torch.zeros_like(self.start_map).to( + torch.device("cpu") + ) + self.coords = coords.to(self.device) + self.goal_map = goal_map.to(self.device) + self.been_there = torch.zeros_like(self.goal_map).to(self.device) + self.height = obstacles.size(2) + self.width = obstacles.size(3) + m, goal_idx = torch.max(self.goal_map.view(-1), 0) + c_map = self.calculate_local_path_costs( + non_obstacle_cost_map + ) + # c_map might be non persistent in map update + self.g_map = self.init_g_map() + self.close_list_map = self.init_closelistmap() + self.open_list_map = self.init_openlistmap() + not_done = False + step = 0 + stopped_by_max_iter = False + if self.visualize: + self.fig, self.ax = plt.subplots(1, 1) + self.image = self.ax.imshow( + self.g_map.squeeze().cpu().detach().numpy().astype(np.float32), + animated=True, + ) + self.fig.canvas.draw() + not_done = (self.close_list_map.view(-1)[goal_idx].item() < 1.0) or ( + self.g_map.view(-1)[goal_idx].item() >= 0.9 * self.ob_cost + ) + rad = 1 + self.start_coords = ( + (self.coords * self.start_map.expand_as(self.coords)) + .sum(dim=2) + .sum(dim=2) + .squeeze() + ) + node_coords = self.start_coords + self.goal_coords = ( + (self.coords * self.goal_map.expand_as(self.coords)) + .sum(dim=2) + .sum(dim=2) + .squeeze() + ) + self.max_steps = 4 * int( + torch.sqrt( + ((self.start_coords - self.goal_coords) ** 2).sum() + 1e-6 + ).item() + ) + while not_done: + ymin, ymax, xmin, xmax = self.safe_roi_2d( + node_coords[0] - rad, + node_coords[0] + rad + 1, + node_coords[1] - rad, + node_coords[1] + rad + 1, + ) + if ( + (ymin - 1 > 0) + and (xmin - 1 > 0) + and (ymax + 1 < self.height) + and (xmax + 1 < self.width) + ): + n2c = self.neights2channels( + self.g_map[:, :, ymin - 1:ymax + 1, xmin - 1:xmax + 1] + ) + self.g_map[:, :, ymin:ymax, xmin:xmax] = torch.min( + self.g_map[:, :, ymin:ymax, xmin:xmax].clone(), + (n2c + c_map[:, :, ymin:ymax, xmin:xmax]).min( + dim=1, keepdim=True + )[0], + ) + self.close_list_map[:, :, ymin:ymax, xmin:xmax] = torch.max( + self.close_list_map[:, :, ymin:ymax, xmin:xmax], + self.open_list_map[:, :, ymin:ymax, xmin:xmax], + ) + self.open_list_map[:, :, ymin:ymax, xmin:xmax] = F.relu( + F.max_pool2d( + self.open_list_map[ + :, :, ymin - 1:ymax + 1, xmin - 1:xmax + 1 + ], + 3, + stride=1, + padding=0, + ) + - self.close_list_map[:, :, ymin:ymax, xmin:xmax] + - self.obstacles[:, :, ymin:ymax, xmin:xmax] + ) + else: + self.g_map = torch.min( + self.g_map, + ( + self.neights2channels( + F.pad(self.g_map, (1, 1, 1, 1), "replicate") + ) + + c_map + ).min(dim=1, keepdim=True)[0], + ) + self.close_list_map = torch.max( + self.close_list_map, self.open_list_map + ) + self.open_list_map = F.relu( + F.max_pool2d(self.open_list_map, 3, stride=1, padding=1) + - self.close_list_map + - self.obstacles + ) + step += 1 + if step >= self.max_steps: + stopped_by_max_iter = True + break + not_done = ( + self.close_list_map.view(-1)[goal_idx].item() < 1.0 + ) or (self.g_map.view(-1)[goal_idx].item() >= 0.1 * self.inf) + rad += 1 + if not stopped_by_max_iter: + for i in range(additional_steps): + # now propagating beyong start point + self.g_map = torch.min( + self.g_map, + ( + self.neights2channels( + F.pad(self.g_map, (1, 1, 1, 1), "replicate") + ) + + c_map + ).min(dim=1, keepdim=True)[0], + ) + self.close_list_map = torch.max( + self.close_list_map, self.open_list_map + ) + self.open_list_map = F.relu( + F.max_pool2d(self.open_list_map, 3, stride=1, padding=1) + - self.close_list_map + - self.obstacles + ) + if return_path: + out_path, cost = self.reconstruct_path() + return out_path, cost + return + + def calculate_local_path_costs(self, non_obstacle_cost_map=None): + coords = self.coords + h = coords.size(2) + w = coords.size(3) + obstacles_pd = F.pad(self.obstacles, (1, 1, 1, 1), "replicate") + if non_obstacle_cost_map is None: + learned_bias = torch.ones_like(self.obstacles).to( + obstacles_pd.device + ) + else: + learned_bias = non_obstacle_cost_map.to(obstacles_pd.device) + left_diff_sq = ( + self.gx_to_left( + F.pad(coords[:, 1:2, :, :], (1, 1, 0, 0), "replicate") + ) + ** 2 + ) + right_diff_sq = ( + self.gx_to_right( + F.pad(coords[:, 1:2, :, :], (1, 1, 0, 0), "replicate") + ) + ** 2 + ) + up_diff_sq = ( + self.gy_to_up( + F.pad(coords[:, 0:1, :, :], (0, 0, 1, 1), "replicate") + ) + ** 2 + ) + down_diff_sq = ( + self.gy_to_down( + F.pad(coords[:, 0:1, :, :], (0, 0, 1, 1), "replicate") + ) + ** 2 + ) + out = torch.cat([ + # Order in from up to down, from left to right + # hopefully same as in PyTorch + torch.sqrt(left_diff_sq + up_diff_sq + self.eps) + + self.ob_cost + * torch.max( + obstacles_pd[:, :, 0:h, 0:w], + obstacles_pd[:, :, 1:h + 1, 1:w + 1], + ), + torch.sqrt(left_diff_sq + self.eps) + + self.ob_cost + * torch.max( + obstacles_pd[:, :, 0:h, 1:w + 1], + obstacles_pd[:, :, 1:h + 1, 1:w + 1], + ), + torch.sqrt(left_diff_sq + down_diff_sq + self.eps) + + self.ob_cost + * torch.max( + obstacles_pd[:, :, 2:h + 2, 0:w], + obstacles_pd[:, :, 1:h + 1, 1:w + 1], + ), + torch.sqrt(up_diff_sq + self.eps) + + self.ob_cost + * torch.max( + obstacles_pd[:, :, 0:h, 1:w + 1], + obstacles_pd[:, :, 1:h + 1, 1:w + 1], + ), + 0 * right_diff_sq + + self.ob_cost + * obstacles_pd[:, :, 1:h + 1, 1:w + 1], # current center + torch.sqrt(down_diff_sq + self.eps) + + self.ob_cost + * torch.max( + obstacles_pd[:, :, 2:h + 2, 1:w + 1], + obstacles_pd[:, :, 1:h + 1, 1:w + 1], + ), + torch.sqrt(right_diff_sq + up_diff_sq + self.eps) + + self.ob_cost + * torch.max( + obstacles_pd[:, :, 0:h, 2:w + 2], + obstacles_pd[:, :, 1:h + 1, 1:w + 1], + ), + torch.sqrt(right_diff_sq + self.eps) + + self.ob_cost + * torch.max( + obstacles_pd[:, :, 1:h + 1, 2:w + 2], + obstacles_pd[:, :, 1:h + 1, 1:w + 1], + ), + torch.sqrt(right_diff_sq + down_diff_sq + self.eps) + + self.ob_cost + * torch.max( + obstacles_pd[:, :, 2:h + 2, 2:w + 2], + obstacles_pd[:, :, 1:h + 1, 1:w + 1], + ), + ], + dim=1, + ) + return out + torch.clamp( + learned_bias.expand_as(out), min=0, max=self.ob_cost + ) + + def propagate_traversal(self, node_coords, close, g, coords): + ymin, ymax, xmin, xmax = self.safe_roi_2d( + node_coords[0] - 1, + node_coords[0] + 2, + node_coords[1] - 1, + node_coords[1] + 2, + ) + mask = close[:, :, ymin:ymax, xmin:xmax] > 0 + mask[ + :, :, f2ind(node_coords, 0) - ymin, f2ind(node_coords, 1) - xmin + ] = 0 + mask = mask > 0 + current_g_cost = g[:, :, ymin:ymax, xmin:xmax][mask].clone() + if len(current_g_cost.view(-1)) == 0: + # we are kind surrounded by obstacles, + # but still need to output something + mask = torch.relu( + 1.0 - self.been_there[:, :, ymin:ymax, xmin:xmax] + ) + mask[ + :, + :, + f2ind(node_coords, 0) - ymin, + f2ind(node_coords, 1) - xmin, + ] = 0 + mask = mask > 0 + current_g_cost = g[:, :, ymin:ymax, xmin:xmax][mask].clone() + if len(current_g_cost.view(-1)) > 1: + current_g_cost = current_g_cost - torch.min(current_g_cost).item() + current_g_cost = current_g_cost + 0.41 * torch.randperm( + len(current_g_cost), + dtype=torch.float32, + device=torch.device("cpu"), + ) / (len(current_g_cost)) + # + coords_roi = coords[:, :, ymin:ymax, xmin:xmax] + out = self.argmin( + current_g_cost, coords_roi[mask.expand_as(coords_roi)] + ) + return out + + def get_clean_costmap_and_goodmask(self): + good_mask = 1 - F.max_pool2d(self.obstacles, 3, stride=1, padding=1) + costmap = self.g_map + obstacle_cost_corrected = 10000.0 + sampling_map = torch.clamp(costmap, min=0, max=obstacle_cost_corrected) + return sampling_map, good_mask + + def reconstruct_path(self): + out_path = [] + goal_coords = ( + self.goal_coords.cpu() + ) + start_coords = ( + self.start_coords.cpu() + ) + + cost = self.g_map[:, :, f2ind(goal_coords, 0), f2ind(goal_coords, 1)] + # Traversing + done = False + node_coords = goal_coords.cpu() + out_path.append(node_coords) + self.been_there = 0 * self.been_there.cpu() + self.been_there[ + :, :, f2ind(node_coords, 0), f2ind(node_coords, 1) + ] = 1.0 + self.close_list_map = self.close_list_map.cpu() + self.g_map = self.g_map.cpu() + self.coords = self.coords.cpu() + count1 = 0 + while not done: + node_coords = self.propagate_traversal( + node_coords, self.close_list_map, self.g_map, self.coords + ) + self.been_there[ + :, :, f2ind(node_coords, 0), f2ind(node_coords, 1) + ] = 1.0 + if torch.norm(node_coords - out_path[-1], 2).item() < 0.3: + y = node_coords.flatten()[0].long() + x = node_coords.flatten()[1].long() + print(self.g_map[0, 0, y - 2:y + 3, x - 2:x + 3]) + print("loop in out_path", node_coords) + raise ValueError("loop in out_path") + return out_path, cost + out_path.append(node_coords) + done = torch.norm(node_coords - start_coords.cpu(), 2).item() < 0.3 + count1 += 1 + if count1 > 250: + break + return out_path, cost diff --git a/baselines/slambased/reprojection.py b/baselines/slambased/reprojection.py new file mode 100644 index 0000000000000000000000000000000000000000..566b47745f04e0445a59eb2d3583157988818f19 --- /dev/null +++ b/baselines/slambased/reprojection.py @@ -0,0 +1,289 @@ +import numpy as np +import torch +from math import ceil, floor + + + +def p_zx(p): + return p[(0, 2), 3] + + +def get_map_size_in_cells(map_size_in_meters, cell_size_in_meters): + return int(ceil(map_size_in_meters / cell_size_in_meters)) + 1 + + +def get_pos_diff(p_init, p_fin): + return p_zx(p_fin) - p_zx(p_init) + + +def get_distance(p_init, p_fin): + return torch.norm(get_pos_diff(p_init, p_fin)) + + +def get_pos_diffs(ps): + return ps[1:, (0, 2), 3] - ps[: (ps.size(0) - 1), (0, 2), 3] + + +def angle_to_pi_2_minus_pi_2(angle): + if angle < -np.pi: + angle = 2.0 * np.pi + angle + if angle > np.pi: + angle = -2.0 * np.pi + angle + return angle + + +def get_direction(p_init, p_fin, ang_th=0.2, pos_th=0.1): + pos_diff = get_pos_diff(p_init, p_fin) + if torch.norm(pos_diff, 2).item() < pos_th: + return 0 + else: + needed_angle = torch.atan2(pos_diff[1], pos_diff[0]) + current_angle = torch.atan2(p_init[2, 0], p_init[0, 0]) + to_rotate = angle_to_pi_2_minus_pi_2( + -np.pi / 2.0 + needed_angle - current_angle + ) + if torch.abs(to_rotate).item() < ang_th: + return 0 + return to_rotate + + +def reproject_local_to_global(xyz_local, p): + device = xyz_local.device + num, dim = xyz_local.size() + if dim == 3: + xyz = torch.cat( + [ + xyz_local, + torch.ones((num, 1), dtype=torch.float32, device=device), + ], + dim=1, + ) + elif dim == 4: + xyz = xyz_local + else: + raise ValueError( + "3d point cloud dim is neighter 3, or 4 (homogenious)" + ) + # print(xyz.shape, P.shape) + xyz_global = torch.mm(p.squeeze(), xyz.t()) + return xyz_global.t() + + +def project2d_pcl_into_worldmap(zx, map_size, cell_size): + device = zx.device + shift = int(floor(get_map_size_in_cells(map_size, cell_size) / 2.0)) + topdown2index = torch.tensor( + [[1.0 / cell_size, 0, shift], [0, 1.0 / cell_size, shift], [0, 0, 1]], + device=device, + ) + world_coords_h = torch.cat( + [zx.view(-1, 2), torch.ones((len(zx), 1), device=device)], dim=1 + ) + world_coords = torch.mm(topdown2index, world_coords_h.t()) + return world_coords.t()[:, :2] + + +def get_pose2d(poses6d): + poses6d = poses6d.view(-1, 4, 4) + poses2d = poses6d[:, (0, 2)] + poses2d = poses2d[:, :, (0, 2, 3)] + return poses2d + + +def get_rotation_matrix(angle_in_radians): + angle_in_radians = angle_in_radians.view(-1, 1, 1) + sin_a = torch.sin(angle_in_radians) + cos_a = torch.cos(angle_in_radians) + a1x = torch.cat([cos_a, sin_a], dim=2) + a2x = torch.cat([-sin_a, cos_a], dim=2) + transform = torch.cat([a1x, a2x], dim=1) + return transform + + +def normalize_zx_ori(p): + p2d = get_pose2d(p) + norms = torch.norm(p2d[:, 0, :2], dim=1).view(-1, 1, 1) + out = torch.cat( + [ + torch.cat( + [p[:, :3, :3] / norms.expand(p.size(0), 3, 3), p[:, 3:, :3]], + dim=1, + ), + p[:, :, 3:], + ], + dim=2, + ) + return out + + +def add_rot_wps(p): + planned_tps_norm = normalize_zx_ori(p) + pos_diffs = get_pos_diffs(planned_tps_norm) + + angles = torch.atan2(pos_diffs[:, 1], pos_diffs[:, 0]) + rotmats = get_rotation_matrix(angles) + planned_tps_norm[:p.size(0) - 1, 0, 0] = rotmats[:, 0, 0] + planned_tps_norm[:p.size(0) - 1, 0, 2] = rotmats[:, 0, 1] + planned_tps_norm[:p.size(0) - 1, 2, 0] = rotmats[:, 1, 0] + planned_tps_norm[:p.size(0) - 1, 2, 2] = rotmats[:, 1, 1] + + planned_points2 = planned_tps_norm.clone() + + planned_points2[1:, 0, 0] = planned_tps_norm[:p.size(0) - 1, 0, 0] + planned_points2[1:, 0, 2] = planned_tps_norm[:p.size(0) - 1, 0, 2] + planned_points2[1:, 2, 0] = planned_tps_norm[:p.size(0) - 1, 2, 0] + planned_points2[1:, 2, 2] = planned_tps_norm[:p.size(0) - 1, 2, 2] + out = torch.stack( + (planned_points2.unsqueeze(0), planned_tps_norm.unsqueeze(0)), dim=0 + ).squeeze() + out = out.permute(1, 0, 2, 3).contiguous().view(-1, 4, 4) + return out + + +def planned_path2tps(path, cell_size, map_size, agent_h, add_rot=False): + '''Path is list of 2d coordinates from planner, in map cells. + tp is trajectory pose, 4x4 matrix - same format, + as in localization module + ''' + path = torch.cat(path).view(-1, 2) + # print(path.size()) + num_pts = len(path) + planned_tps = torch.eye(4).unsqueeze(0).repeat((num_pts, 1, 1)) + planned_tps[:, 0, 3] = path[:, 1] # switch back x and z + planned_tps[:, 1, 3] = agent_h + planned_tps[:, 2, 3] = path[:, 0] # switch back x and z + shift = int(floor(get_map_size_in_cells(map_size, cell_size) / 2.0)) + planned_tps[:, 0, 3] = planned_tps[:, 0, 3] - shift + planned_tps[:, 2, 3] = planned_tps[:, 2, 3] - shift + p = torch.tensor( + [ + [1.0 / cell_size, 0, 0, 0], + [0, 1.0 / cell_size, 0, 0], + [0, 0, 1.0 / cell_size, 0], + [0, 0, 0, 1], + ] + ) + planned_tps = torch.bmm( + p.inverse().unsqueeze(0).expand(num_pts, 4, 4), planned_tps + ) + if add_rot: + return add_rot_wps(planned_tps) + return planned_tps + + +def habitat_goalpos_to_tp(ro_phi, p_curr): + '''Convert distance and azimuth to + trajectory pose, 4x4 matrix - same format, + as in localization module + ''' + device = ro_phi.device + offset = torch.tensor( + [ + -ro_phi[0] * torch.sin(ro_phi[1]), + 0, + ro_phi[0] * torch.cos(ro_phi[1]), + ] + ).to(device) + if p_curr.size(1) == 3: + p_curr = homogenize_p(p_curr) + goal_tp = torch.mm( + p_curr.to(device), + torch.cat( + [ + offset + * torch.tensor( + [1.0, 1.0, 1.0], dtype=torch.float32, device=device + ), + torch.tensor([1.0], device=device), + ] + ).reshape(4, 1), + ) + return goal_tp + + +def habitat_goalpos_to_mapgoal_pos(offset, p_curr, cell_size, map_size): + '''Convert distance and azimuth to + map cell coordinates + ''' + device = offset.device + goal_tp = habitat_goalpos_to_tp(offset, p_curr) + goal_tp1 = torch.eye(4).to(device) + goal_tp1[:, 3:] = goal_tp + projected_p = project_tps_into_worldmap( + goal_tp1.view(1, 4, 4), cell_size, map_size + ) + return projected_p + + + +def homogenize_p(tps): + device = tps.device + tps = tps.view(-1, 3, 4) + return torch.cat( + [ + tps.float(), + torch.tensor([0, 0, 0, 1.0]) + .view(1, 1, 4) + .expand(tps.size(0), 1, 4) + .to(device), + ], + dim=1, + ) + + +def project_tps_into_worldmap(tps, cell_size, map_size, do_floor=True): + '''Convert 4x4 pose matrices (trajectory poses) to + map cell coordinates + ''' + if len(tps) == 0: + return [] + if isinstance(tps, list): + return [] + device = tps.device + topdown_p = torch.tensor([[1.0, 0, 0, 0], [0, 0, 1.0, 0]]).to(device) + world_coords = torch.bmm( + topdown_p.view(1, 2, 4).expand(tps.size(0), 2, 4), + tps[:, :, 3:].view(-1, 4, 1), + ) + shift = int(floor(get_map_size_in_cells(map_size, cell_size) / 2.0)) + topdown2index = torch.tensor( + [[1.0 / cell_size, 0, shift], [0, 1.0 / cell_size, shift], [0, 0, 1]] + ).to(device) + world_coords_h = torch.cat( + [world_coords, torch.ones((len(world_coords), 1, 1)).to(device)], dim=1 + ) + world_coords = torch.bmm( + topdown2index.unsqueeze(0).expand(world_coords_h.size(0), 3, 3), + world_coords_h, + )[:, :2, 0] + if do_floor: + return ( + torch.floor(world_coords.flip(1)) + 1 + ) # for having revesrve (z,x) ordering + return world_coords.flip(1) + + +def project_tps_into_worldmap_numpy(tps, slam_to_world, cell_size, map_size): + if len(tps) == 0: + return [] + if isinstance(tps, list): + return [] + # tps is expected in [n,4,4] format + topdown_p = np.array([[slam_to_world, 0, 0, 0], [0, 0, slam_to_world, 0]]) + try: + world_coords = np.matmul( + topdown_p.reshape(1, 2, 4), tps[:, :, 3:].reshape(-1, 4, 1) + ) + except BaseException: + return [] + shift = int(floor(get_map_size_in_cells(map_size, cell_size) / 2.0)) + topdown2index = np.array( + [[1.0 / cell_size, 0, shift], [0, 1.0 / cell_size, shift], [0, 0, 1]] + ) + world_coords_h = np.concatenate( + [world_coords, np.ones((len(world_coords), 1, 1))], axis=1 + ) + world_coords = np.matmul(topdown2index, world_coords_h)[:, :2, 0] + return ( + world_coords[:, ::-1].astype(np.int32) + 1 + ) # for having revesrve (z,x) ordering diff --git a/baselines/slambased/utils.py b/baselines/slambased/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8b39671c7b38c4cc84e9f65a855c48cd283b2ed8 --- /dev/null +++ b/baselines/slambased/utils.py @@ -0,0 +1,43 @@ +import numpy as np +import torch +import time +from PIL import Image + + +def generate_2dgrid(h, w, centered=False): + if centered: + x = torch.linspace(-w / 2 + 1, w / 2, w) + y = torch.linspace(-h / 2 + 1, h / 2, h) + else: + x = torch.linspace(0, w - 1, w) + y = torch.linspace(0, h - 1, h) + grid2d = torch.stack( + [y.repeat(w, 1).t().contiguous().view(-1), x.repeat(h)], 1 + ) + return grid2d.view(1, h, w, 2).permute(0, 3, 1, 2) + + +def str2bool(v): + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + + +def resize_pil(np_img, size=128): + im1 = Image.fromarray(np_img) + im1.thumbnail((size, size)) + return np.array(im1) + + +def find_map_size(h, w): + map_size_in_meters = int(0.1 * 3 * max(h, w)) + if map_size_in_meters % 10 != 0: + map_size_in_meters = map_size_in_meters + ( + 10 - (map_size_in_meters % 10) + ) + return map_size_in_meters + + +def gettimestr(): + return time.strftime("%Y-%m-%d--%H_%M_%S", time.gmtime()) diff --git a/configs/tasks/pointnav_rgbd.yaml b/configs/tasks/pointnav_rgbd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9635a93b002a5af3e0b8205663f5b1614ba6dbb9 --- /dev/null +++ b/configs/tasks/pointnav_rgbd.yaml @@ -0,0 +1,24 @@ +ENVIRONMENT: + MAX_EPISODE_STEPS: 500 +SIMULATOR: + AGENT_0: + SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] + HABITAT_SIM_V0: + GPU_DEVICE_ID: 0 + RGB_SENSOR: + WIDTH: 256 + HEIGHT: 256 + DEPTH_SENSOR: + WIDTH: 256 + HEIGHT: 256 +TASK: + TYPE: Nav-v0 + SUCCESS_DISTANCE: 0.2 + SENSORS: ['POINTGOAL_SENSOR'] + POINTGOAL_SENSOR: + TYPE: PointGoalSensor + GOAL_FORMAT: POLAR + MEASUREMENTS: ['SPL'] + SPL: + TYPE: SPL + SUCCESS_DISTANCE: 0.2