diff --git a/README.md b/README.md index b8ad9aa0a28845b3160dd23a21c701c0c33c58cc..b9025913c8d6fcae30b5ed9a3de2cc9ee3b40e9c 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,7 @@ Download the Habitat related Gibson dataset following the instructions [here](ht | --- | --- | --- | --- | --- | --- | | [Point goal navigation](https://arxiv.org/abs/1807.06757) | Gibson | [pointnav_gibson_v1.zip](https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v1/pointnav_gibson_v1.zip) | `data/datasets/pointnav/gibson/v1/` | [`datasets/pointnav/gibson.yaml`](configs/datasets/pointnav/gibson.yaml) | 385 MB | | [Point goal navigation](https://arxiv.org/abs/1807.06757) | MatterPort3D | [pointnav_mp3d_v1.zip](https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/mp3d/v1/pointnav_mp3d_v1.zip) | `data/datasets/pointnav/mp3d/v1/` | [`datasets/pointnav/mp3d.yaml`](configs/datasets/pointnav/mp3d.yaml) | 400 MB | +| Object goal navigation | MatterPort3D | [objectnav_mp3d_v0.zip](https://dl.fbaipublicfiles.com/habitat/data/datasets/objectnav/m3d/v0/objectnav_mp3d_v0.zip) | `data/datasets/objectnav/mp3d/v0/` | [`datasets/objectnav/mp3d.yaml`](configs/datasets/objectnav/mp3d.yaml) | 4 GB | | [Embodied Question Answering](https://embodiedqa.org/) | MatterPort3D | [eqa_mp3d_v1.zip](https://dl.fbaipublicfiles.com/habitat/data/datasets/eqa/mp3d/v1/eqa_mp3d_v1.zip) | `data/datasets/eqa/mp3d/v1/` | [`datasets/eqa/mp3d.yaml`](configs/datasets/eqa/mp3d.yaml) | 44 MB | | [Visual Language Navigation](https://bringmeaspoon.org/) | MatterPort3D | [vln_r2r_mp3d_v1.zip](https://dl.fbaipublicfiles.com/habitat/data/datasets/vln/mp3d/r2r/v1/vln_r2r_mp3d_v1.zip) | `data/datasets/vln/mp3d/r2r/v1` | [`datasets/vln/mp3d_r2r.yaml`](configs/datasets/vln/mp3d_r2r.yaml) | 2.7 MB | diff --git a/configs/datasets/objectnav/mp3d.yaml b/configs/datasets/objectnav/mp3d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1bb50e7bdf245bb4130c57607b411d291bd60809 --- /dev/null +++ b/configs/datasets/objectnav/mp3d.yaml @@ -0,0 +1,4 @@ +DATASET: + TYPE: ObjectNav-v1 + SPLIT: train + DATA_PATH: data/datasets/objectnav/mp3d/v0/{split}/{split}.json.gz diff --git a/configs/tasks/obj_nav_mp3d.yaml b/configs/tasks/obj_nav_mp3d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdf61f0288ed67c516a28e2f7334a93056c9b63d --- /dev/null +++ b/configs/tasks/obj_nav_mp3d.yaml @@ -0,0 +1,47 @@ +ENVIRONMENT: + MAX_EPISODE_STEPS: 500 +SIMULATOR: + TURN_ANGLE: 30 + TILT_ANGLE: 30 + AGENT_0: + SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] + HEIGHT: 0.88 + RADIUS: 0.2 + HABITAT_SIM_V0: + GPU_DEVICE_ID: 0 + SEMANTIC_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + POSITION: [0, 0.88, 0] + RGB_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + POSITION: [0, 0.88, 0] + DEPTH_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + MIN_DEPTH: 0.5 + MAX_DEPTH: 5.0 + POSITION: [0, 0.88, 0] +TASK: + TYPE: ObjectNav-v1 + SUCCESS_DISTANCE: 0.1 + + SENSORS: ['OBJECTGOAL_SENSOR'] + GOAL_SENSOR_UUID: objectgoal + + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] + SPL: + TYPE: SPL + DISTANCE_TO: VIEW_POINTS + SUCCESS_DISTANCE: 0.2 + +DATASET: + TYPE: ObjectNav-v1 + SPLIT: val + CONTENT_SCENES: [] + DATA_PATH: "data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" + SCENES_DIR: "data/scene_datasets/" diff --git a/configs/tasks/objectnav_mp3d.yaml b/configs/tasks/objectnav_mp3d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8811ba838a4d7fc8af95418b250041f0f6700332 --- /dev/null +++ b/configs/tasks/objectnav_mp3d.yaml @@ -0,0 +1,51 @@ +ENVIRONMENT: + MAX_EPISODE_STEPS: 500 +SIMULATOR: + TURN_ANGLE: 30 + TILT_ANGLE: 30 + ACTION_SPACE_CONFIG: "v1" + AGENT_0: + SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] + HEIGHT: 0.88 + RADIUS: 0.2 + HABITAT_SIM_V0: + GPU_DEVICE_ID: 0 + SEMANTIC_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + POSITION: [0, 0.88, 0] + RGB_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + POSITION: [0, 0.88, 0] + DEPTH_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + MIN_DEPTH: 0.5 + MAX_DEPTH: 5.0 + POSITION: [0, 0.88, 0] +TASK: + TYPE: ObjectNav-v1 + POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"] + SUCCESS_DISTANCE: 0.1 + + SENSORS: ['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] + GOAL_SENSOR_UUID: objectgoal + + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] + SPL: + TYPE: SPL + DISTANCE_TO: VIEW_POINTS + SUCCESS_DISTANCE: 0.2 + DISTANCE_TO_GOAL: + DISTANCE_TO: VIEW_POINTS + +DATASET: + TYPE: ObjectNav-v1 + SPLIT: val + CONTENT_SCENES: [] + DATA_PATH: "data/datasets/objectnav/mp3d/v0/{split}/{split}.json.gz" + SCENES_DIR: "data/scene_datasets/" diff --git a/configs/tasks/pointnav.yaml b/configs/tasks/pointnav.yaml index bcf81eaec4bfe237c7aa0086d029f6a4655d6c76..0f60544a723184a5a3f8b2b5f5eb2092db6758a1 100644 --- a/configs/tasks/pointnav.yaml +++ b/configs/tasks/pointnav.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/tasks/pointnav_gibson.yaml b/configs/tasks/pointnav_gibson.yaml index 52d5ae7082f61a44056cbc42fc57530c788f2fba..729c64c905d972c730761e1903b696096c654d33 100644 --- a/configs/tasks/pointnav_gibson.yaml +++ b/configs/tasks/pointnav_gibson.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/tasks/pointnav_mp3d.yaml b/configs/tasks/pointnav_mp3d.yaml index 341cac7e6d1cedb7bba4c089f3f7a330993cf9a0..1c53200beeb88ba0f5389f71c0587fe14b80b62d 100644 --- a/configs/tasks/pointnav_mp3d.yaml +++ b/configs/tasks/pointnav_mp3d.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/tasks/pointnav_rgbd.yaml b/configs/tasks/pointnav_rgbd.yaml index e1660792c4cf41760081209397c26c0adf0c195a..5c74f537ddafb47edf2ec6bd04bfaf685c80d8f9 100644 --- a/configs/tasks/pointnav_rgbd.yaml +++ b/configs/tasks/pointnav_rgbd.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/tasks/vln_r2r.yaml b/configs/tasks/vln_r2r.yaml index 41873216395a8109a5d091770c2caf883bd753f4..bdb4bbe67343257293be3ce3eeceaa8af961a930 100644 --- a/configs/tasks/vln_r2r.yaml +++ b/configs/tasks/vln_r2r.yaml @@ -21,7 +21,7 @@ TASK: SENSORS: ['INSTRUCTION_SENSOR'] INSTRUCTION_SENSOR_UUID: instruction POSSIBLE_ACTIONS: ['STOP', 'MOVE_FORWARD', 'TURN_LEFT', 'TURN_RIGHT'] - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 3.0 diff --git a/configs/test/habitat_all_sensors_test.yaml b/configs/test/habitat_all_sensors_test.yaml index d840f48636412636c4b4af25affe78dc1fe455ed..a603475b7f421ecb4a2f0d4c3817b29d2ef5c049 100644 --- a/configs/test/habitat_all_sensors_test.yaml +++ b/configs/test/habitat_all_sensors_test.yaml @@ -25,7 +25,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/test/habitat_mp3d_object_nav_test.yaml b/configs/test/habitat_mp3d_object_nav_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..163f1a58bcc6a3422c373aa4d90fe4737a7c2957 --- /dev/null +++ b/configs/test/habitat_mp3d_object_nav_test.yaml @@ -0,0 +1,51 @@ +ENVIRONMENT: + MAX_EPISODE_STEPS: 500 +SIMULATOR: + TURN_ANGLE: 30 + TILT_ANGLE: 30 + ACTION_SPACE_CONFIG: "v1" + AGENT_0: + SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] + HEIGHT: 0.88 + RADIUS: 0.2 + HABITAT_SIM_V0: + GPU_DEVICE_ID: 0 + SEMANTIC_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + POSITION: [0, 0.88, 0] + RGB_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + POSITION: [0, 0.88, 0] + DEPTH_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + MIN_DEPTH: 0.5 + MAX_DEPTH: 5.0 + POSITION: [0, 0.88, 0] +TASK: + TYPE: ObjectNav-v1 + POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"] + SUCCESS_DISTANCE: 0.1 + + SENSORS: ['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] + GOAL_SENSOR_UUID: objectgoal + + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] + SPL: + TYPE: SPL + DISTANCE_TO: VIEW_POINTS + SUCCESS_DISTANCE: 0.2 + DISTANCE_TO_GOAL: + DISTANCE_TO: VIEW_POINTS + +DATASET: + TYPE: ObjectNav-v1 + SPLIT: mini_val + CONTENT_SCENES: [] + DATA_PATH: "data/datasets/objectnav/mp3d/v0/{split}/{split}.json.gz" + SCENES_DIR: "data/scene_datasets/" diff --git a/configs/test/habitat_r2r_vln_test.yaml b/configs/test/habitat_r2r_vln_test.yaml index 7e6fd8511f22274f76c4ab3efbd7f8f7ada588e7..e202568a4bed01f20f93fd0119ecd6b698d7c959 100644 --- a/configs/test/habitat_r2r_vln_test.yaml +++ b/configs/test/habitat_r2r_vln_test.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass INSTRUCTION_SENSOR_UUID: instruction - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 3.0 diff --git a/examples/shortest_path_follower_example.py b/examples/shortest_path_follower_example.py index 36be4c0ea94d0ad561d07eaabee567ea4a4b467a..7b2262aed1e60c2d56d588e562c7f349d3277619 100644 --- a/examples/shortest_path_follower_example.py +++ b/examples/shortest_path_follower_example.py @@ -95,7 +95,7 @@ def shortest_path_example(mode): observations, reward, done, info = env.step(best_action) im = observations["rgb"] top_down_map = draw_top_down_map( - info, observations["heading"], im.shape[0] + info, observations["heading"][0], im.shape[0] ) output_im = np.concatenate((im, top_down_map), axis=1) images.append(output_im) diff --git a/habitat/config/default.py b/habitat/config/default.py index 044c49dcd87afdf108ea5e0e83b7b03319b9af6e..af732c386b93f16c06bcfad26f50e73898c93482 100644 --- a/habitat/config/default.py +++ b/habitat/config/default.py @@ -94,6 +94,13 @@ _C.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.TYPE = ( "PointGoalWithGPSCompassSensor" ) # ----------------------------------------------------------------------------- +# OBJECTGOAL SENSOR +# ----------------------------------------------------------------------------- +_C.TASK.OBJECTGOAL_SENSOR = CN() +_C.TASK.OBJECTGOAL_SENSOR.TYPE = "ObjectGoalSensor" +_C.TASK.OBJECTGOAL_SENSOR.GOAL_SPEC = "TASK_CATEGORY_ID" +_C.TASK.OBJECTGOAL_SENSOR.GOAL_SPEC_MAX_VAL = 50 +# ----------------------------------------------------------------------------- # HEADING SENSOR # ----------------------------------------------------------------------------- _C.TASK.HEADING_SENSOR = CN() @@ -130,13 +137,17 @@ _C.TASK.TOP_DOWN_MAP.MAX_EPISODE_STEPS = _C.ENVIRONMENT.MAX_EPISODE_STEPS _C.TASK.TOP_DOWN_MAP.MAP_PADDING = 3 _C.TASK.TOP_DOWN_MAP.NUM_TOPDOWN_MAP_SAMPLE_POINTS = 20000 _C.TASK.TOP_DOWN_MAP.MAP_RESOLUTION = 1250 -_C.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = True +_C.TASK.TOP_DOWN_MAP.DRAW_SOURCE = True _C.TASK.TOP_DOWN_MAP.DRAW_BORDER = True _C.TASK.TOP_DOWN_MAP.DRAW_SHORTEST_PATH = True _C.TASK.TOP_DOWN_MAP.FOG_OF_WAR = CN() _C.TASK.TOP_DOWN_MAP.FOG_OF_WAR.DRAW = True _C.TASK.TOP_DOWN_MAP.FOG_OF_WAR.VISIBILITY_DIST = 5.0 _C.TASK.TOP_DOWN_MAP.FOG_OF_WAR.FOV = 90 +_C.TASK.TOP_DOWN_MAP.DRAW_VIEW_POINTS = True +_C.TASK.TOP_DOWN_MAP.DRAW_GOAL_POSITIONS = True +# Axes aligned bounding boxes +_C.TASK.TOP_DOWN_MAP.DRAW_GOAL_AABBS = True # ----------------------------------------------------------------------------- # COLLISIONS MEASUREMENT # ----------------------------------------------------------------------------- @@ -173,6 +184,7 @@ _C.TASK.INSTRUCTION_SENSOR_UUID = "instruction" # ----------------------------------------------------------------------------- _C.TASK.DISTANCE_TO_GOAL = CN() _C.TASK.DISTANCE_TO_GOAL.TYPE = "DistanceToGoal" +_C.TASK.DISTANCE_TO_GOAL.DISTANCE_TO = "POINT" # ----------------------------------------------------------------------------- # # ANSWER_ACCURACY MEASUREMENT # ----------------------------------------------------------------------------- @@ -210,8 +222,8 @@ _C.SIMULATOR.RGB_SENSOR.TYPE = "HabitatSimRGBSensor" # ----------------------------------------------------------------------------- _C.SIMULATOR.DEPTH_SENSOR = SIMULATOR_SENSOR.clone() _C.SIMULATOR.DEPTH_SENSOR.TYPE = "HabitatSimDepthSensor" -_C.SIMULATOR.DEPTH_SENSOR.MIN_DEPTH = 0 -_C.SIMULATOR.DEPTH_SENSOR.MAX_DEPTH = 10 +_C.SIMULATOR.DEPTH_SENSOR.MIN_DEPTH = 0.0 +_C.SIMULATOR.DEPTH_SENSOR.MAX_DEPTH = 10.0 _C.SIMULATOR.DEPTH_SENSOR.NORMALIZE_DEPTH = True # ----------------------------------------------------------------------------- # SEMANTIC SENSOR diff --git a/habitat/core/embodied_task.py b/habitat/core/embodied_task.py index aafc7d6a760df5aeb33e02fddc0a6d01f92522db..7a3813c022b354e674dd69714feee3d3789a97e4 100644 --- a/habitat/core/embodied_task.py +++ b/habitat/core/embodied_task.py @@ -8,7 +8,7 @@ r"""Implements tasks and measurements needed for training and benchmarking of """ from collections import OrderedDict -from typing import Any, Dict, Iterable, Optional, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Type, Union import numpy as np @@ -175,6 +175,31 @@ class Measurements: """ return Metrics(self.measures) + def _get_measure_index(self, measure_name): + return list(self.measures.keys()).index(measure_name) + + def check_measure_dependencies( + self, measure_name: str, dependencies: List[str] + ): + r"""Checks if dependencies measures are enabled and calculatethat the measure + :param measure_name: a name of the measure for which has dependencies. + :param dependencies: a list of a measure names that are required by + the measure. + :return: + """ + measure_index = self._get_measure_index(measure_name) + for dependency_measure in dependencies: + assert ( + dependency_measure in self.measures + ), f"""{measure_name} measure requires {dependency_measure} + listed in tje measures list in the config.""" + + for dependency_measure in dependencies: + assert measure_index > self._get_measure_index( + dependency_measure + ), f"""{measure_name} measure requires be listed after {dependency_measure} + in tje measures list in the config.""" + class EmbodiedTask: r"""Base class for embodied tasks. ``EmbodiedTask`` holds definition of diff --git a/habitat/core/utils.py b/habitat/core/utils.py index 7b4bb29944b7e5baa03164667d779c5bc162bbb3..ff07b92197899903eec2c82063ed8910415cca96 100644 --- a/habitat/core/utils.py +++ b/habitat/core/utils.py @@ -4,9 +4,24 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +import json from typing import List import numpy as np +import quaternion + +from habitat.utils.geometry_utils import quaternion_to_list + +# Internals from inner json library needed for patching functionality in +# DatasetFloatJSONEncoder. +try: + from _json import encode_basestring_ascii +except ImportError: + encode_basestring_ascii = None +try: + from _json import encode_basestring +except ImportError: + encode_basestring = None def tile_images(images: List[np.ndarray]) -> np.ndarray: @@ -92,3 +107,70 @@ def center_crop(obs, new_shape): obs = obs[top_left[0] : bottom_right[0], top_left[1] : bottom_right[1], :] return obs + + +class DatasetFloatJSONEncoder(json.JSONEncoder): + r"""JSON Encoder that sets a float precision for a space saving purpose and + encodes ndarray and quaternion. The encoder is compatible with JSON + version 2.0.9. + """ + + def default(self, object): + # JSON doesn't support numpy ndarray and quaternion + if isinstance(object, np.ndarray): + return object.tolist() + if isinstance(object, np.quaternion): + return quaternion_to_list(object) + quaternion + return object.__dict__ + + # Overriding method to inject own `_repr` function for floats with needed + # precision. + def iterencode(self, o, _one_shot=False): + + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + + def floatstr( + o, + allow_nan=self.allow_nan, + _repr=lambda x: format(x, ".5f"), + _inf=float("inf"), + _neginf=-float("inf"), + ): + if o != o: + text = "NaN" + elif o == _inf: + text = "Infinity" + elif o == _neginf: + text = "-Infinity" + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o) + ) + + return text + + _iterencode = json.encoder._make_iterencode( + markers, + self.default, + _encoder, + self.indent, + floatstr, + self.key_separator, + self.item_separator, + self.sort_keys, + self.skipkeys, + _one_shot, + ) + return _iterencode(o, 0) diff --git a/habitat/datasets/eqa/mp3d_eqa_dataset.py b/habitat/datasets/eqa/mp3d_eqa_dataset.py index 610aefe5ff4abef7843012d2a0e8ba9465e3209d..bf6b920211f98697650a98e0763348de3ac82969 100644 --- a/habitat/datasets/eqa/mp3d_eqa_dataset.py +++ b/habitat/datasets/eqa/mp3d_eqa_dataset.py @@ -15,7 +15,8 @@ from habitat.core.registry import registry from habitat.core.simulator import AgentState from habitat.datasets.utils import VocabDict, VocabFromText from habitat.tasks.eqa.eqa import EQAEpisode, QuestionData -from habitat.tasks.nav.nav import ObjectGoal, ShortestPathPoint +from habitat.tasks.nav.nav import ShortestPathPoint +from habitat.tasks.nav.object_nav_task import ObjectGoal EQA_MP3D_V1_VAL_EPISODE_COUNT = 1950 DEFAULT_SCENE_PATH_PREFIX = "data/scene_datasets/" diff --git a/habitat/datasets/object_nav/__init__.py b/habitat/datasets/object_nav/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c37139061ff67f50f50a814a7e41adefae81b901 --- /dev/null +++ b/habitat/datasets/object_nav/__init__.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from habitat.core.dataset import Dataset +from habitat.core.registry import registry + + +# TODO(akadian): This is a result of moving SimulatorActions away from core +# and into simulators specifically. As a result of that the connection points +# for our tasks and datasets for actions is coming from inside habitat-sim +# which makes it impossible for anyone to use habitat-api without having +# habitat-sim installed. In a future PR we will implement a base simulator +# action class which will be the connection point for tasks and datasets. +# Post that PR we would no longer need try register blocks. +def _try_register_objectnavdatasetv1(): + try: + from habitat.datasets.object_nav.object_nav_dataset import ( + ObjectNavDatasetV1, + ) + + has_pointnav = True + except ImportError as e: + has_pointnav = False + pointnav_import_error = e + + if has_pointnav: + from habitat.datasets.object_nav.object_nav_dataset import ( + ObjectNavDatasetV1, + ) + else: + + @registry.register_dataset(name="ObjectNav-v1") + class ObjectNavDatasetImportError(Dataset): + def __init__(self, *args, **kwargs): + raise pointnav_import_error diff --git a/habitat/datasets/object_nav/object_nav_dataset.py b/habitat/datasets/object_nav/object_nav_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..a595b26bdadf254969868feda27eefc4bd7fc8bb --- /dev/null +++ b/habitat/datasets/object_nav/object_nav_dataset.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +import os +from typing import Dict, List, Optional + +from habitat.core.registry import registry +from habitat.core.simulator import AgentState, ShortestPathPoint +from habitat.core.utils import DatasetFloatJSONEncoder +from habitat.datasets.pointnav.pointnav_dataset import ( + CONTENT_SCENES_PATH_FIELD, + DEFAULT_SCENE_PATH_PREFIX, + PointNavDatasetV1, +) +from habitat.tasks.nav.nav import NavigationEpisode +from habitat.tasks.nav.object_nav_task import ObjectGoal, ObjectViewLocation + + +@registry.register_dataset(name="ObjectNav-v1") +class ObjectNavDatasetV1(PointNavDatasetV1): + r"""Class inherited from PointNavDataset that loads Object Navigation dataset. + """ + category_to_task_category_id: Dict[str, int] + category_to_scene_annotation_category_id: Dict[str, int] + episodes: List[NavigationEpisode] + content_scenes_path: str = "{data_path}/content/{scene}.json.gz" + + def to_json(self) -> str: + result = DatasetFloatJSONEncoder().encode(self) + return result + + def from_json( + self, json_str: str, scenes_dir: Optional[str] = None + ) -> None: + deserialized = json.loads(json_str) + if CONTENT_SCENES_PATH_FIELD in deserialized: + self.content_scenes_path = deserialized[CONTENT_SCENES_PATH_FIELD] + + if "category_to_task_category_id" in deserialized: + self.category_to_task_category_id = deserialized[ + "category_to_task_category_id" + ] + + if "category_to_scene_annotation_category_id" in deserialized: + self.category_to_scene_annotation_category_id = deserialized[ + "category_to_scene_annotation_category_id" + ] + + if "category_to_mp3d_category_id" in deserialized: + self.category_to_scene_annotation_category_id = deserialized[ + "category_to_mp3d_category_id" + ] + + assert len(self.category_to_task_category_id) == len( + self.category_to_scene_annotation_category_id + ) + + assert set(self.category_to_task_category_id.keys()) == set( + self.category_to_scene_annotation_category_id.keys() + ), "category_to_task and category_to_mp3d must have the same keys" + + for episode in deserialized["episodes"]: + episode = NavigationEpisode(**episode) + + if scenes_dir is not None: + if episode.scene_id.startswith(DEFAULT_SCENE_PATH_PREFIX): + episode.scene_id = episode.scene_id[ + len(DEFAULT_SCENE_PATH_PREFIX) : + ] + + episode.scene_id = os.path.join(scenes_dir, episode.scene_id) + + for i in range(len(episode.goals)): + episode.goals[i] = ObjectGoal(**episode.goals[i]) + + for vidx, view in enumerate(episode.goals[i].view_points): + view_location = ObjectViewLocation(**view) + view_location.agent_state = AgentState( + **view_location.agent_state + ) + episode.goals[i].view_points[vidx] = view_location + + if episode.shortest_paths is not None: + for path in episode.shortest_paths: + for p_index, point in enumerate(path): + point = { + "action": point, + "rotation": None, + "position": None, + } + path[p_index] = ShortestPathPoint(**point) + + self.episodes.append(episode) + + for i, ep in enumerate(self.episodes): + ep.episode_id = str(i) diff --git a/habitat/datasets/pointnav/pointnav_dataset.py b/habitat/datasets/pointnav/pointnav_dataset.py index 3245e5fee133acca30ca1ebda2957c4dbdd9fd43..2b6a66428645173865314f27a59debfb1c54a5f1 100644 --- a/habitat/datasets/pointnav/pointnav_dataset.py +++ b/habitat/datasets/pointnav/pointnav_dataset.py @@ -37,12 +37,12 @@ class PointNavDatasetV1(Dataset): config.DATA_PATH.format(split=config.SPLIT) ) and os.path.exists(config.SCENES_DIR) - @staticmethod - def get_scenes_to_load(config: Config) -> List[str]: + @classmethod + def get_scenes_to_load(cls, config: Config) -> List[str]: r"""Return list of scene ids for which dataset has separate files with episodes. """ - assert PointNavDatasetV1.check_config_paths_exist(config) + assert cls.check_config_paths_exist(config) dataset_dir = os.path.dirname( config.DATA_PATH.format(split=config.SPLIT) ) @@ -50,8 +50,8 @@ class PointNavDatasetV1(Dataset): cfg = config.clone() cfg.defrost() cfg.CONTENT_SCENES = [] - dataset = PointNavDatasetV1(cfg) - return PointNavDatasetV1._get_scenes_from_folder( + dataset = cls(cfg) + return cls._get_scenes_from_folder( content_scenes_path=dataset.content_scenes_path, dataset_dir=dataset_dir, ) @@ -86,7 +86,7 @@ class PointNavDatasetV1(Dataset): dataset_dir = os.path.dirname(datasetfile_path) scenes = config.CONTENT_SCENES if ALL_SCENES_MASK in scenes: - scenes = PointNavDatasetV1._get_scenes_from_folder( + scenes = self.__class__._get_scenes_from_folder( content_scenes_path=self.content_scenes_path, dataset_dir=dataset_dir, ) diff --git a/habitat/datasets/registration.py b/habitat/datasets/registration.py index a9f3add4671295a294033e6698eddb4b8c2127c8..27e509e955a54879bc8107bf850ed0310cc67b14 100644 --- a/habitat/datasets/registration.py +++ b/habitat/datasets/registration.py @@ -7,6 +7,7 @@ from habitat.core.logging import logger from habitat.core.registry import registry from habitat.datasets.eqa import _try_register_mp3d_eqa_dataset +from habitat.datasets.object_nav import _try_register_objectnavdatasetv1 from habitat.datasets.pointnav import _try_register_pointnavdatasetv1 from habitat.datasets.vln import _try_register_r2r_vln_dataset @@ -19,6 +20,7 @@ def make_dataset(id_dataset, **kwargs): return _dataset(**kwargs) +_try_register_objectnavdatasetv1() _try_register_mp3d_eqa_dataset() _try_register_pointnavdatasetv1() _try_register_r2r_vln_dataset() diff --git a/habitat/sims/habitat_simulator/habitat_simulator.py b/habitat/sims/habitat_simulator/habitat_simulator.py index ba3b820672ec759852ab149f6595e97d91ebcf59..1bf4f75c1cca1b7fff263e50d3d40fbccf469ae7 100644 --- a/habitat/sims/habitat_simulator/habitat_simulator.py +++ b/habitat/sims/habitat_simulator/habitat_simulator.py @@ -287,6 +287,7 @@ class HabitatSim(Simulator): def geodesic_distance(self, position_a, position_b): path = habitat_sim.MultiGoalShortestPath() path.requested_start = np.array(position_a, dtype=np.float32) + if isinstance(position_b[0], List) or isinstance( position_b[0], np.ndarray ): diff --git a/habitat/tasks/nav/nav.py b/habitat/tasks/nav/nav.py index e78e70499b9f59d7089443b0f997bc9318c0714e..49d835d2dad256d82518e6a04d04f5f5a64b2e49 100644 --- a/habitat/tasks/nav/nav.py +++ b/habitat/tasks/nav/nav.py @@ -17,6 +17,7 @@ from habitat.core.embodied_task import ( Measure, SimulatorTaskAction, ) +from habitat.core.logging import logger from habitat.core.registry import registry from habitat.core.simulator import ( AgentState, @@ -69,20 +70,6 @@ class NavigationGoal: radius: Optional[float] = None -@attr.s(auto_attribs=True, kw_only=True) -class ObjectGoal(NavigationGoal): - r"""Object goal that can be specified by object_id or position or object - category. - """ - - object_id: str = attr.ib(default=None, validator=not_none_validator) - object_name: Optional[str] = None - object_category: Optional[str] = None - room_id: Optional[str] = None - room_name: Optional[str] = None - view_points: Optional[List[AgentState]] = None - - @attr.s(auto_attribs=True, kw_only=True) class RoomGoal(NavigationGoal): r"""Room goal that can be specified by room_id or position with radius. @@ -285,7 +272,7 @@ class HeadingSensor(Sensor): heading_vector = quaternion_rotate_vector(quat, direction_vector) phi = cartesian_to_polar(-heading_vector[2], heading_vector[0])[1] - return np.array(phi) + return np.array([phi], dtype=np.float32) def get_observation( self, observations, episode, *args: Any, **kwargs: Any @@ -420,6 +407,8 @@ class SPL(Measure): ref: On Evaluation of Embodied Agents - Anderson et. al https://arxiv.org/pdf/1807.06757.pdf + The measure depends on Distance to Goal measure to improve computational + performance for sophisticated goal areas. """ def __init__( @@ -428,6 +417,7 @@ class SPL(Measure): self._previous_position = None self._start_end_episode_distance = None self._agent_episode_distance = None + self._episode_view_points = None self._sim = sim self._config = config @@ -436,11 +426,14 @@ class SPL(Measure): def _get_uuid(self, *args: Any, **kwargs: Any): return "spl" - def reset_metric(self, *args: Any, episode, **kwargs: Any): + def reset_metric(self, *args: Any, episode, task, **kwargs: Any): self._previous_position = self._sim.get_agent_state().position.tolist() self._start_end_episode_distance = episode.info["geodesic_distance"] self._agent_episode_distance = 0.0 - self._metric = None + task.measurements.check_measure_dependencies( + self.uuid, [DistanceToGoal.cls_uuid] + ) + self.update_metric(*args, episode=episode, task=task, **kwargs) def _euclidean_distance(self, position_a, position_b): return np.linalg.norm( @@ -448,14 +441,13 @@ class SPL(Measure): ) def update_metric( - self, *args: Any, episode, action, task: EmbodiedTask, **kwargs: Any + self, *args: Any, episode, task: EmbodiedTask, **kwargs: Any ): ep_success = 0 current_position = self._sim.get_agent_state().position.tolist() - - distance_to_target = self._sim.geodesic_distance( - current_position, episode.goals[0].position - ) + distance_to_target = task.measurements.measures[ + DistanceToGoal.cls_uuid + ].get_metric() if ( hasattr(task, "is_stop_called") @@ -530,6 +522,9 @@ class TopDownMap(Measure): self.line_thickness = int( np.round(self._map_resolution[0] * 2 / MAP_THICKNESS_SCALAR) ) + self.point_padding = 2 * int( + np.ceil(self._map_resolution[0] / MAP_THICKNESS_SCALAR) + ) super().__init__() def _get_uuid(self, *args: Any, **kwargs: Any): @@ -559,51 +554,95 @@ class TopDownMap(Measure): return top_down_map - def draw_source_and_target(self, episode): - # mark source point - s_x, s_y = maps.to_grid( - episode.start_position[0], - episode.start_position[2], - self._coordinate_min, - self._coordinate_max, - self._map_resolution, - ) - point_padding = 2 * int( - np.ceil(self._map_resolution[0] / MAP_THICKNESS_SCALAR) - ) - self._top_down_map[ - s_x - point_padding : s_x + point_padding + 1, - s_y - point_padding : s_y + point_padding + 1, - ] = maps.MAP_SOURCE_POINT_INDICATOR - - # mark target point + def _draw_point(self, position, point_type): t_x, t_y = maps.to_grid( - episode.goals[0].position[0], - episode.goals[0].position[2], + position[0], + position[2], self._coordinate_min, self._coordinate_max, self._map_resolution, ) self._top_down_map[ - t_x - point_padding : t_x + point_padding + 1, - t_y - point_padding : t_y + point_padding + 1, - ] = maps.MAP_TARGET_POINT_INDICATOR - - def reset_metric(self, *args: Any, episode, **kwargs: Any): - self._step_count = 0 - self._metric = None - self._top_down_map = self.get_original_map() - agent_position = self._sim.get_agent_state().position - a_x, a_y = maps.to_grid( - agent_position[0], - agent_position[2], - self._coordinate_min, - self._coordinate_max, - self._map_resolution, - ) - self._previous_xy_location = (a_y, a_x) + t_x - self.point_padding : t_x + self.point_padding + 1, + t_y - self.point_padding : t_y + self.point_padding + 1, + ] = point_type + + def _draw_goals_view_points(self, episode): + if self._config.DRAW_VIEW_POINTS: + for goal in episode.goals: + try: + if goal.view_points is not None: + for view_point in goal.view_points: + self._draw_point( + view_point.agent_state.position, + maps.MAP_VIEW_POINT_INDICATOR, + ) + except AttributeError: + pass + + def _draw_goals_positions(self, episode): + if self._config.DRAW_GOAL_POSITIONS: + + for goal in episode.goals: + try: + self._draw_point( + goal.position, maps.MAP_TARGET_POINT_INDICATOR + ) + except AttributeError: + pass + + def _draw_goals_aabb(self, episode): + if self._config.DRAW_GOAL_AABBS: + for goal in episode.goals: + try: + sem_scene = self._sim.semantic_annotations() + object_id = goal.object_id + assert int( + sem_scene.objects[object_id].id.split("_")[-1] + ) == int( + goal.object_id + ), f"Object_id doesn't correspond to id in semantic scene objects dictionary for episode: {episode}" + + center = sem_scene.objects[object_id].aabb.center + x_len, _, z_len = ( + sem_scene.objects[object_id].aabb.sizes / 2.0 + ) + # Nodes to draw rectangle + corners = [ + center + np.array([x, 0, z]) + for x, z in [ + (-x_len, -z_len), + (-x_len, z_len), + (x_len, z_len), + (x_len, -z_len), + (-x_len, -z_len), + ] + ] + + map_corners = [ + maps.to_grid( + p[0], + p[2], + self._coordinate_min, + self._coordinate_max, + self._map_resolution, + ) + for p in corners + ] + + maps.draw_path( + self._top_down_map, + map_corners, + maps.MAP_TARGET_BOUNDING_BOX, + self.line_thickness, + ) + except AttributeError: + pass + + def _draw_shortest_path( + self, episode: Episode, agent_position: AgentState + ): if self._config.DRAW_SHORTEST_PATH: - # draw shortest path self._shortest_path_points = self._sim.get_straight_shortest_path_points( agent_position, episode.goals[0].position ) @@ -614,7 +653,7 @@ class TopDownMap(Measure): self._coordinate_min, self._coordinate_max, self._map_resolution, - )[::-1] + ) for p in self._shortest_path_points ] maps.draw_path( @@ -624,11 +663,33 @@ class TopDownMap(Measure): self.line_thickness, ) + def reset_metric(self, *args: Any, episode, **kwargs: Any): + self._step_count = 0 + self._metric = None + self._top_down_map = self.get_original_map() + agent_position = self._sim.get_agent_state().position + a_x, a_y = maps.to_grid( + agent_position[0], + agent_position[2], + self._coordinate_min, + self._coordinate_max, + self._map_resolution, + ) + self._previous_xy_location = (a_y, a_x) + self.update_fog_of_war_mask(np.array([a_x, a_y])) - # draw source and target points last to avoid overlap - if self._config.DRAW_SOURCE_AND_TARGET: - self.draw_source_and_target(episode) + # draw source and target parts last to avoid overlap + self._draw_goals_view_points(episode) + self._draw_goals_aabb(episode) + self._draw_goals_positions(episode) + + self._draw_shortest_path(episode, agent_position) + + if self._config.DRAW_SOURCE: + self._draw_point( + episode.start_position, maps.MAP_SOURCE_POINT_INDICATOR + ) def _clip_map(self, _map): return _map[ @@ -723,10 +784,11 @@ class TopDownMap(Measure): @registry.register_measure class DistanceToGoal(Measure): - """The measure provides a set of metrics that illustrate agent's progress - towards the goal. + """The measure calculates a distance towards the goal. """ + cls_uuid: str = "distance_to_goal" + def __init__( self, sim: Simulator, config: Config, *args: Any, **kwargs: Any ): @@ -735,11 +797,12 @@ class DistanceToGoal(Measure): self._agent_episode_distance = None self._sim = sim self._config = config + self._episode_view_points = None super().__init__(**kwargs) def _get_uuid(self, *args: Any, **kwargs: Any): - return "distance_to_goal" + return self.cls_uuid def reset_metric(self, episode, *args: Any, **kwargs: Any): self._previous_position = self._sim.get_agent_state().position.tolist() @@ -748,18 +811,34 @@ class DistanceToGoal(Measure): ) self._agent_episode_distance = 0.0 self._metric = None + if self._config.DISTANCE_TO == "VIEW_POINTS": + self._episode_view_points = [ + view_point.agent_state.position + for goal in episode.goals + for view_point in goal.view_points + ] + self.update_metric(*args, episode=episode, **kwargs) def _euclidean_distance(self, position_a, position_b): return np.linalg.norm( np.array(position_b) - np.array(position_a), ord=2 ) - def update_metric(self, episode, action, *args: Any, **kwargs: Any): + def update_metric(self, episode, *args: Any, **kwargs: Any): current_position = self._sim.get_agent_state().position.tolist() - distance_to_target = self._sim.geodesic_distance( - current_position, episode.goals[0].position - ) + if self._config.DISTANCE_TO == "POINT": + distance_to_target = self._sim.geodesic_distance( + current_position, [goal.position for goal in episode.goals] + ) + elif self._config.DISTANCE_TO == "VIEW_POINTS": + distance_to_target = self._sim.geodesic_distance( + current_position, self._episode_view_points + ) + else: + logger.error( + f"Non valid DISTANCE_TO parameter was provided: {self._config.DISTANCE_TO}" + ) self._agent_episode_distance += self._euclidean_distance( current_position, self._previous_position @@ -767,13 +846,7 @@ class DistanceToGoal(Measure): self._previous_position = current_position - self._metric = { - "distance_to_target": distance_to_target, - "start_distance_to_target": self._start_end_episode_distance, - "distance_delta": self._start_end_episode_distance - - distance_to_target, - "agent_path_length": self._agent_episode_distance, - } + self._metric = distance_to_target @registry.register_task_action @@ -853,7 +926,7 @@ class TeleportAction(SimulatorTaskAction): *args: Any, position: List[float], rotation: List[float], - **kwargs: Any + **kwargs: Any, ): r"""Update ``_metric``, this method is called from ``Env`` on each ``step``. diff --git a/habitat/tasks/nav/object_nav_task.py b/habitat/tasks/nav/object_nav_task.py new file mode 100644 index 0000000000000000000000000000000000000000..87389448d4585dbdff185327a302ab87a8b39a64 --- /dev/null +++ b/habitat/tasks/nav/object_nav_task.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, List, Optional + +import attr +import numpy as np +from gym import spaces + +from habitat.config import Config +from habitat.core.dataset import Dataset +from habitat.core.logging import logger +from habitat.core.registry import registry +from habitat.core.simulator import AgentState, Sensor, SensorTypes +from habitat.core.utils import not_none_validator +from habitat.tasks.nav.nav import ( + NavigationEpisode, + NavigationGoal, + NavigationTask, +) + + +@attr.s(auto_attribs=True) +class ObjectViewLocation: + r"""ObjectViewLocation provides information about a position around an object goal + usually that is navigable and the object is visible with specific agent + configuration that episode's dataset was created. + that is target for + navigation. That can be specify object_id, position and object + category. An important part for metrics calculation are view points that + describe success area for the navigation. + + Args: + agent_state: navigable AgentState with a position and a rotation where + the object is visible. + iou: an intersection of a union of the object and a rectangle in the + center of view. This metric is used to evaluate how good is the object + view form current position. Higher iou means better view, iou equals + 1.0 if whole object is inside of the rectangle and no pixel inside + the rectangle belongs to anything except the object. + """ + agent_state: AgentState + iou: Optional[float] + + +@attr.s(auto_attribs=True, kw_only=True) +class ObjectGoal(NavigationGoal): + r"""Object goal provides information about an object that is target for + navigation. That can be specify object_id, position and object + category. An important part for metrics calculation are view points that + describe success area for the navigation. + + Args: + object_id: id that can be used to retrieve object from the semantic + scene annotation + object_name: name of the object + object_category: object category name usually similar to scene semantic + categories + room_id: id of a room where object is located, can be used to retrieve + room from the semantic scene annotation + room_name: name of the room, where object is located + view_points: navigable positions around the object with specified + proximity of the object surface used for navigation metrics calculation. + The object is visible from these positions. + """ + + object_id: str = attr.ib(default=None, validator=not_none_validator) + object_name: Optional[str] = None + object_category: Optional[str] = None + room_id: Optional[str] = None + room_name: Optional[str] = None + view_points: Optional[List[ObjectViewLocation]] = None + + +@registry.register_sensor +class ObjectGoalSensor(Sensor): + r"""A sensor for Object Goal specification as observations which is used in + ObjectGoal Navigation. The goal is expected to be specified by object_id or + semantic category id. + For the agent in simulator the forward direction is along negative-z. + In polar coordinate format the angle returned is azimuth to the goal. + Args: + sim: a reference to the simulator for calculating task observations. + config: a config for the ObjectGoalSensor sensor. Can contain field + GOAL_SPEC that specifies which id use for goal specification, + GOAL_SPEC_MAX_VAL the maximum object_id possible used for + observation space definition. + dataset: a Object Goal navigation dataset that contains dictionaries + of categories id to text mapping. + """ + + def __init__( + self, sim, config: Config, dataset: Dataset, *args: Any, **kwargs: Any + ): + self._sim = sim + self._dataset = dataset + super().__init__(config=config) + + def _get_uuid(self, *args: Any, **kwargs: Any): + return "objectgoal" + + def _get_sensor_type(self, *args: Any, **kwargs: Any): + return SensorTypes.SEMANTIC + + def _get_observation_space(self, *args: Any, **kwargs: Any): + sensor_shape = (1,) + max_value = (self.config.GOAL_SPEC_MAX_VAL - 1,) + if self.config.GOAL_SPEC == "TASK_CATEGORY_ID": + max_value = max( + self._dataset.category_to_task_category_id.values() + ) + + return spaces.Box( + low=0, high=max_value, shape=sensor_shape, dtype=np.int64 + ) + + def get_observation( + self, + observations, + *args: Any, + episode: NavigationEpisode, + **kwargs: Any, + ) -> Optional[int]: + if self.config.GOAL_SPEC == "TASK_CATEGORY_ID": + if len(episode.goals) == 0: + logger.error( + f"No goal specified for episode {episode.episode_id}." + ) + return None + if not isinstance(episode.goals[0], ObjectGoal): + logger.error( + f"First goal should be ObjectGoal, episode {episode.episode_id}." + ) + return None + category_name = episode.goals[0].object_category + return np.array( + [self._dataset.category_to_task_category_id[category_name]], + dtype=np.int64, + ) + elif self.config.GOAL_SPEC == "OBJECT_ID": + return np.array([episode.goals[0].object_name_id], dtype=np.int64) + else: + raise RuntimeError( + "Wrong GOAL_SPEC specified for ObjectGoalSensor." + ) + + +@registry.register_task(name="ObjectNav-v1") +class ObjectNavigationTask(NavigationTask): + r"""An Object Navigation Task class for a task specific methods. + Used to explicitly state a type of the task in config. + """ + pass diff --git a/habitat/tasks/utils.py b/habitat/tasks/utils.py index c32d4b4f29d81eb24d71820c27146e47604c77f0..f468377d5183b220fc2d4a6e01424c35b673b9f0 100644 --- a/habitat/tasks/utils.py +++ b/habitat/tasks/utils.py @@ -64,3 +64,9 @@ def cartesian_to_polar(x, y): rho = np.sqrt(x ** 2 + y ** 2) phi = np.arctan2(y, x) return rho, phi + + +def compute_pixel_coverage(instance_seg, object_id): + cand_mask = instance_seg == object_id + score = cand_mask.sum().astype(np.float64) / cand_mask.size + return score diff --git a/habitat/utils/visualizations/maps.py b/habitat/utils/visualizations/maps.py index 0b98f026148e59dca8e38f06a8c6e1903f60edc3..d9ab319c6abdc73669449699297661a2c611fb49 100644 --- a/habitat/utils/visualizations/maps.py +++ b/habitat/utils/visualizations/maps.py @@ -37,16 +37,20 @@ MAP_BORDER_INDICATOR = 2 MAP_SOURCE_POINT_INDICATOR = 4 MAP_TARGET_POINT_INDICATOR = 6 MAP_SHORTEST_PATH_COLOR = 7 +MAP_VIEW_POINT_INDICATOR = 8 +MAP_TARGET_BOUNDING_BOX = 9 TOP_DOWN_MAP_COLORS = np.full((256, 3), 150, dtype=np.uint8) TOP_DOWN_MAP_COLORS[10:] = cv2.applyColorMap( np.arange(246, dtype=np.uint8), cv2.COLORMAP_JET ).squeeze(1)[:, ::-1] -TOP_DOWN_MAP_COLORS[MAP_INVALID_POINT] = [255, 255, 255] -TOP_DOWN_MAP_COLORS[MAP_VALID_POINT] = [150, 150, 150] -TOP_DOWN_MAP_COLORS[MAP_BORDER_INDICATOR] = [50, 50, 50] -TOP_DOWN_MAP_COLORS[MAP_SOURCE_POINT_INDICATOR] = [0, 0, 200] -TOP_DOWN_MAP_COLORS[MAP_TARGET_POINT_INDICATOR] = [200, 0, 0] -TOP_DOWN_MAP_COLORS[MAP_SHORTEST_PATH_COLOR] = [0, 200, 0] +TOP_DOWN_MAP_COLORS[MAP_INVALID_POINT] = [255, 255, 255] # White +TOP_DOWN_MAP_COLORS[MAP_VALID_POINT] = [150, 150, 150] # Light Grey +TOP_DOWN_MAP_COLORS[MAP_BORDER_INDICATOR] = [50, 50, 50] # Grey +TOP_DOWN_MAP_COLORS[MAP_SOURCE_POINT_INDICATOR] = [0, 0, 200] # Blue +TOP_DOWN_MAP_COLORS[MAP_TARGET_POINT_INDICATOR] = [200, 0, 0] # Red +TOP_DOWN_MAP_COLORS[MAP_SHORTEST_PATH_COLOR] = [0, 200, 0] # Green +TOP_DOWN_MAP_COLORS[MAP_VIEW_POINT_INDICATOR] = [245, 150, 150] # Light Red +TOP_DOWN_MAP_COLORS[MAP_TARGET_BOUNDING_BOX] = [0, 175, 0] # Green def draw_agent( @@ -375,4 +379,11 @@ def draw_path( thickness: thickness of the path. """ for prev_pt, next_pt in zip(path_points[:-1], path_points[1:]): - cv2.line(top_down_map, prev_pt, next_pt, color, thickness=thickness) + # Swapping x y + cv2.line( + top_down_map, + prev_pt[::-1], + next_pt[::-1], + color, + thickness=thickness, + ) diff --git a/habitat_baselines/common/base_trainer.py b/habitat_baselines/common/base_trainer.py index 3af1209d6328d370c8510d8a8b6ae283a55561e1..407afabf47144b8e89e02b92745c82ab24d05341 100644 --- a/habitat_baselines/common/base_trainer.py +++ b/habitat_baselines/common/base_trainer.py @@ -123,6 +123,7 @@ class BaseRLTrainer(BaseTrainer): """ config = self.config.clone() + config.defrost() ckpt_cmd_opts = checkpoint_config.CMD_TRAILING_OPTS eval_cmd_opts = config.CMD_TRAILING_OPTS diff --git a/habitat_baselines/common/environments.py b/habitat_baselines/common/environments.py index b7d43dd888a8f60fe0084b40058460f3d199dd66..99eeb14080567526da13228e88a165cee1fd1c81 100644 --- a/habitat_baselines/common/environments.py +++ b/habitat_baselines/common/environments.py @@ -10,7 +10,7 @@ in habitat. Customized environments should be registered using ``@baseline_registry.register_env(name="myEnv")` for reusability """ -from typing import Any, Dict, Optional, Type, Union +from typing import Optional, Type import habitat from habitat import Config, Dataset @@ -34,20 +34,18 @@ class NavRLEnv(habitat.RLEnv): def __init__(self, config: Config, dataset: Optional[Dataset] = None): self._rl_config = config.RL self._core_env_config = config.TASK_CONFIG + self._reward_measure_name = self._rl_config.REWARD_MEASURE + self._success_measure_name = self._rl_config.SUCCESS_MEASURE - self._previous_target_distance = None + self._previous_measure = None self._previous_action = None - self._episode_distance_covered = None - self._success_distance = self._core_env_config.TASK.SUCCESS_DISTANCE super().__init__(self._core_env_config, dataset) def reset(self): self._previous_action = None - observations = super().reset() - - self._previous_target_distance = self.habitat_env.current_episode.info[ - "geodesic_distance" + self._previous_measure = self._env.get_metrics()[ + self._reward_measure_name ] return observations @@ -64,30 +62,18 @@ class NavRLEnv(habitat.RLEnv): def get_reward(self, observations): reward = self._rl_config.SLACK_REWARD - current_target_distance = self._distance_target() - reward += self._previous_target_distance - current_target_distance - self._previous_target_distance = current_target_distance + current_measure = self._env.get_metrics()[self._reward_measure_name] + + reward += self._previous_measure - current_measure + self._previous_measure = current_measure if self._episode_success(): reward += self._rl_config.SUCCESS_REWARD return reward - def _distance_target(self): - current_position = self._env.sim.get_agent_state().position.tolist() - distance = self._env.sim.geodesic_distance( - current_position, - [goal.position for goal in self._env.current_episode.goals], - ) - return distance - def _episode_success(self): - if ( - self._env.task.is_stop_called - and self._distance_target() < self._success_distance - ): - return True - return False + return self._env.get_metrics()[self._success_measure_name] def get_done(self, observations): done = False diff --git a/habitat_baselines/config/default.py b/habitat_baselines/config/default.py index b4a3abbcf789678f14ad9bf1f553f86851716ade..2d3b3f068358acb2e1202ff0cfbcaf7a254d4561 100644 --- a/habitat_baselines/config/default.py +++ b/habitat_baselines/config/default.py @@ -47,6 +47,8 @@ _C.EVAL.USE_CKPT_CONFIG = True # REINFORCEMENT LEARNING (RL) ENVIRONMENT CONFIG # ----------------------------------------------------------------------------- _C.RL = CN() +_C.RL.REWARD_MEASURE = "distance_to_goal" +_C.RL.SUCCESS_MEASURE = "spl" _C.RL.SUCCESS_REWARD = 10.0 _C.RL.SLACK_REWARD = -0.01 # ----------------------------------------------------------------------------- diff --git a/habitat_baselines/config/objectnav/ddppo_objectnav.yaml b/habitat_baselines/config/objectnav/ddppo_objectnav.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e759ad89264bc52b4eda4560d80c49903decac1 --- /dev/null +++ b/habitat_baselines/config/objectnav/ddppo_objectnav.yaml @@ -0,0 +1,60 @@ +BASE_TASK_CONFIG_PATH: "configs/tasks/objectnav_mp3d.yaml" +TRAINER_NAME: "ppo" +ENV_NAME: "NavRLEnv" +SIMULATOR_GPU_ID: 0 +TORCH_GPU_ID: 1 +VIDEO_OPTION: ["disk", "tensorboard"] +TENSORBOARD_DIR: "/checkpoint/maksymets/logs/habitat_baselines/ddppo/obj_nav_mp3d" +VIDEO_DIR: "data/video_dir" +TEST_EPISODE_COUNT: 2184 +EVAL_CKPT_PATH_DIR: "data/new_checkpoints" +NUM_PROCESSES: 2 +SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR"] +CHECKPOINT_FOLDER: "data/new_checkpoints" +NUM_UPDATES: 10000 +LOG_INTERVAL: 10 +CHECKPOINT_INTERVAL: 50 + +RL: + SUCCESS_REWARD: 2.5 + PPO: + # ppo params + clip_param: 0.2 + ppo_epoch: 2 + num_mini_batch: 2 + value_loss_coef: 0.5 + entropy_coef: 0.01 + lr: 2.5e-6 + eps: 1e-5 + max_grad_norm: 0.2 + num_steps: 128 + use_gae: True + gamma: 0.99 + tau: 0.95 + use_linear_clip_decay: False + use_linear_lr_decay: False + reward_window_size: 50 + + use_normalized_advantage: False + + hidden_size: 512 + + DDPPO: + sync_frac: 0.6 + # The PyTorch distributed backend to use + distrib_backend: NCCL + # Visual encoder backbone + pretrained_weights: data/ddppo-models/gibson-2plus-resnet50.pth + # Initialize with pretrained weights + pretrained: False + # Initialize just the visual encoder backbone with pretrained weights + pretrained_encoder: False + # Whether or not the visual encoder backbone will be trained. + train_encoder: True + # Whether or not to reset the critic linear layer + reset_critic: True + + # Model parameters + backbone: resnet50 + rnn_type: LSTM + num_recurrent_layers: 2 diff --git a/test/test_object_nav_task.py b/test/test_object_nav_task.py new file mode 100644 index 0000000000000000000000000000000000000000..59629fd157d97ad3d121e89e35202b1ee2760547 --- /dev/null +++ b/test/test_object_nav_task.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import time + +import numpy as np +import pytest + +import habitat +from habitat.config.default import get_config +from habitat.core.embodied_task import Episode +from habitat.core.logging import logger +from habitat.datasets import make_dataset +from habitat.datasets.object_nav.object_nav_dataset import ObjectNavDatasetV1 +from habitat.tasks.eqa.eqa import AnswerAction +from habitat.tasks.nav.nav import MoveForwardAction +from habitat.utils.test_utils import sample_non_stop_action + +CFG_TEST = "configs/test/habitat_mp3d_object_nav_test.yaml" +EPISODES_LIMIT = 6 + + +def check_json_serializaiton(dataset: habitat.Dataset): + start_time = time.time() + json_str = str(dataset.to_json()) + logger.info( + "JSON conversion finished. {} sec".format((time.time() - start_time)) + ) + decoded_dataset = dataset.__class__() + decoded_dataset.from_json(json_str) + assert len(decoded_dataset.episodes) > 0 + episode = decoded_dataset.episodes[0] + assert isinstance(episode, Episode) + assert ( + decoded_dataset.to_json() == json_str + ), "JSON dataset encoding/decoding isn't consistent" + + +def test_mp3d_object_nav_dataset(): + dataset_config = get_config(CFG_TEST).DATASET + if not ObjectNavDatasetV1.check_config_paths_exist(dataset_config): + pytest.skip( + "Please download Matterport3D ObjectNav Dataset to data folder." + ) + + dataset = habitat.make_dataset( + id_dataset=dataset_config.TYPE, config=dataset_config + ) + assert dataset + check_json_serializaiton(dataset) + + +def test_object_nav_task(): + config = get_config(CFG_TEST) + + if not ObjectNavDatasetV1.check_config_paths_exist(config.DATASET): + pytest.skip( + "Please download Matterport3D scene and ObjectNav Datasets to data folder." + ) + + dataset = make_dataset( + id_dataset=config.DATASET.TYPE, config=config.DATASET + ) + env = habitat.Env(config=config, dataset=dataset) + + for i in range(10): + env.reset() + while not env.episode_over: + action = env.action_space.sample() + habitat.logger.info( + f"Action : " + f"{action['action']}, " + f"args: {action['action_args']}." + ) + env.step(action) + + metrics = env.get_metrics() + logger.info(metrics) + + with pytest.raises(AssertionError): + env.step({"action": MoveForwardAction.name}) diff --git a/test/test_sensors.py b/test/test_sensors.py index 6f68d4e3521064930d3e92eff0aaf971b2385b03..0e3eacb512819fd506cbe0f2e7d1afd4c5a209c2 100644 --- a/test/test_sensors.py +++ b/test/test_sensors.py @@ -78,7 +78,7 @@ def test_state_sensors(): obs = env.reset() heading = obs["heading"] - assert np.allclose(heading, random_heading) + assert np.allclose(heading, [random_heading]) assert np.allclose(obs["compass"], [0.0], atol=1e-5) assert np.allclose(obs["gps"], [0.0, 0.0], atol=1e-5) @@ -241,17 +241,18 @@ def test_pointgoal_with_gps_compass_sensor(): obs = env.step(sample_non_stop_action(env.action_space)) pointgoal = obs["pointgoal"] pointgoal_with_gps_compass = obs["pointgoal_with_gps_compass"] - comapss = obs["compass"] + compass = float(obs["compass"][0]) gps = obs["gps"] # check to see if taking non-stop actions will affect static point_goal assert np.allclose( pointgoal_with_gps_compass, quaternion_rotate_vector( quaternion.from_rotation_vector( - comapss * np.array([0, 1, 0]) + compass * np.array([0, 1, 0]) ).inverse(), pointgoal - gps, ), + atol=1e-5, ) env.close()