diff --git a/configs/tasks/obj_nav_mp3d.yaml b/configs/tasks/obj_nav_mp3d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdf61f0288ed67c516a28e2f7334a93056c9b63d --- /dev/null +++ b/configs/tasks/obj_nav_mp3d.yaml @@ -0,0 +1,47 @@ +ENVIRONMENT: + MAX_EPISODE_STEPS: 500 +SIMULATOR: + TURN_ANGLE: 30 + TILT_ANGLE: 30 + AGENT_0: + SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] + HEIGHT: 0.88 + RADIUS: 0.2 + HABITAT_SIM_V0: + GPU_DEVICE_ID: 0 + SEMANTIC_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + POSITION: [0, 0.88, 0] + RGB_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + POSITION: [0, 0.88, 0] + DEPTH_SENSOR: + WIDTH: 640 + HEIGHT: 480 + HFOV: 79 + MIN_DEPTH: 0.5 + MAX_DEPTH: 5.0 + POSITION: [0, 0.88, 0] +TASK: + TYPE: ObjectNav-v1 + SUCCESS_DISTANCE: 0.1 + + SENSORS: ['OBJECTGOAL_SENSOR'] + GOAL_SENSOR_UUID: objectgoal + + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] + SPL: + TYPE: SPL + DISTANCE_TO: VIEW_POINTS + SUCCESS_DISTANCE: 0.2 + +DATASET: + TYPE: ObjectNav-v1 + SPLIT: val + CONTENT_SCENES: [] + DATA_PATH: "data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" + SCENES_DIR: "data/scene_datasets/" diff --git a/configs/tasks/objectnav_mp3d.yaml b/configs/tasks/objectnav_mp3d.yaml index 1289ceebb7b61553139cf5c2bd9c9b165f94abb7..8811ba838a4d7fc8af95418b250041f0f6700332 100644 --- a/configs/tasks/objectnav_mp3d.yaml +++ b/configs/tasks/objectnav_mp3d.yaml @@ -3,6 +3,7 @@ ENVIRONMENT: SIMULATOR: TURN_ANGLE: 30 TILT_ANGLE: 30 + ACTION_SPACE_CONFIG: "v1" AGENT_0: SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] HEIGHT: 0.88 @@ -28,16 +29,19 @@ SIMULATOR: POSITION: [0, 0.88, 0] TASK: TYPE: ObjectNav-v1 + POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"] SUCCESS_DISTANCE: 0.1 - SENSORS: ['OBJECTGOAL_SENSOR'] + SENSORS: ['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] GOAL_SENSOR_UUID: objectgoal - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL DISTANCE_TO: VIEW_POINTS SUCCESS_DISTANCE: 0.2 + DISTANCE_TO_GOAL: + DISTANCE_TO: VIEW_POINTS DATASET: TYPE: ObjectNav-v1 diff --git a/configs/tasks/pointnav.yaml b/configs/tasks/pointnav.yaml index bcf81eaec4bfe237c7aa0086d029f6a4655d6c76..0f60544a723184a5a3f8b2b5f5eb2092db6758a1 100644 --- a/configs/tasks/pointnav.yaml +++ b/configs/tasks/pointnav.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/tasks/pointnav_gibson.yaml b/configs/tasks/pointnav_gibson.yaml index 52d5ae7082f61a44056cbc42fc57530c788f2fba..729c64c905d972c730761e1903b696096c654d33 100644 --- a/configs/tasks/pointnav_gibson.yaml +++ b/configs/tasks/pointnav_gibson.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/tasks/pointnav_mp3d.yaml b/configs/tasks/pointnav_mp3d.yaml index 341cac7e6d1cedb7bba4c089f3f7a330993cf9a0..1c53200beeb88ba0f5389f71c0587fe14b80b62d 100644 --- a/configs/tasks/pointnav_mp3d.yaml +++ b/configs/tasks/pointnav_mp3d.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/tasks/pointnav_rgbd.yaml b/configs/tasks/pointnav_rgbd.yaml index e1660792c4cf41760081209397c26c0adf0c195a..5c74f537ddafb47edf2ec6bd04bfaf685c80d8f9 100644 --- a/configs/tasks/pointnav_rgbd.yaml +++ b/configs/tasks/pointnav_rgbd.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/tasks/vln_r2r.yaml b/configs/tasks/vln_r2r.yaml index 41873216395a8109a5d091770c2caf883bd753f4..bdb4bbe67343257293be3ce3eeceaa8af961a930 100644 --- a/configs/tasks/vln_r2r.yaml +++ b/configs/tasks/vln_r2r.yaml @@ -21,7 +21,7 @@ TASK: SENSORS: ['INSTRUCTION_SENSOR'] INSTRUCTION_SENSOR_UUID: instruction POSSIBLE_ACTIONS: ['STOP', 'MOVE_FORWARD', 'TURN_LEFT', 'TURN_RIGHT'] - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 3.0 diff --git a/configs/test/habitat_all_sensors_test.yaml b/configs/test/habitat_all_sensors_test.yaml index d840f48636412636c4b4af25affe78dc1fe455ed..a603475b7f421ecb4a2f0d4c3817b29d2ef5c049 100644 --- a/configs/test/habitat_all_sensors_test.yaml +++ b/configs/test/habitat_all_sensors_test.yaml @@ -25,7 +25,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 0.2 diff --git a/configs/test/habitat_mp3d_object_nav_test.yaml b/configs/test/habitat_mp3d_object_nav_test.yaml index 88a5478063003425c3fa8bd1818559fe3ae44af6..163f1a58bcc6a3422c373aa4d90fe4737a7c2957 100644 --- a/configs/test/habitat_mp3d_object_nav_test.yaml +++ b/configs/test/habitat_mp3d_object_nav_test.yaml @@ -1,8 +1,9 @@ ENVIRONMENT: - MAX_EPISODE_STEPS: 750 + MAX_EPISODE_STEPS: 500 SIMULATOR: TURN_ANGLE: 30 TILT_ANGLE: 30 + ACTION_SPACE_CONFIG: "v1" AGENT_0: SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] HEIGHT: 0.88 @@ -26,23 +27,25 @@ SIMULATOR: MIN_DEPTH: 0.5 MAX_DEPTH: 5.0 POSITION: [0, 0.88, 0] - TASK: TYPE: ObjectNav-v1 + POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"] SUCCESS_DISTANCE: 0.1 - SENSORS: ['OBJECTGOAL_SENSOR'] + SENSORS: ['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] GOAL_SENSOR_UUID: objectgoal - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL DISTANCE_TO: VIEW_POINTS SUCCESS_DISTANCE: 0.2 + DISTANCE_TO_GOAL: + DISTANCE_TO: VIEW_POINTS DATASET: TYPE: ObjectNav-v1 SPLIT: mini_val CONTENT_SCENES: [] - DATA_PATH: "data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" + DATA_PATH: "data/datasets/objectnav/mp3d/v0/{split}/{split}.json.gz" SCENES_DIR: "data/scene_datasets/" diff --git a/configs/test/habitat_r2r_vln_test.yaml b/configs/test/habitat_r2r_vln_test.yaml index 7e6fd8511f22274f76c4ab3efbd7f8f7ada588e7..e202568a4bed01f20f93fd0119ecd6b698d7c959 100644 --- a/configs/test/habitat_r2r_vln_test.yaml +++ b/configs/test/habitat_r2r_vln_test.yaml @@ -21,7 +21,7 @@ TASK: DIMENSIONALITY: 2 GOAL_SENSOR_UUID: pointgoal_with_gps_compass INSTRUCTION_SENSOR_UUID: instruction - MEASUREMENTS: ['SPL'] + MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SPL'] SPL: TYPE: SPL SUCCESS_DISTANCE: 3.0 diff --git a/habitat/config/default.py b/habitat/config/default.py index cd4a448b579c5feef9aa5afbe9430915cc83fe11..1182a03e82fdc6c618d820f50a04fd326ffeb8fe 100644 --- a/habitat/config/default.py +++ b/habitat/config/default.py @@ -128,7 +128,6 @@ _C.TASK.PROXIMITY_SENSOR.MAX_DETECTION_RADIUS = 2.0 _C.TASK.SPL = CN() _C.TASK.SPL.TYPE = "SPL" _C.TASK.SPL.SUCCESS_DISTANCE = 0.2 -_C.TASK.SPL.DISTANCE_TO = "POINT" # ----------------------------------------------------------------------------- # TopDownMap MEASUREMENT # ----------------------------------------------------------------------------- @@ -185,6 +184,7 @@ _C.TASK.INSTRUCTION_SENSOR_UUID = "instruction" # ----------------------------------------------------------------------------- _C.TASK.DISTANCE_TO_GOAL = CN() _C.TASK.DISTANCE_TO_GOAL.TYPE = "DistanceToGoal" +_C.TASK.DISTANCE_TO_GOAL.DISTANCE_TO = "POINT" # ----------------------------------------------------------------------------- # # ANSWER_ACCURACY MEASUREMENT # ----------------------------------------------------------------------------- diff --git a/habitat/core/embodied_task.py b/habitat/core/embodied_task.py index aafc7d6a760df5aeb33e02fddc0a6d01f92522db..7a3813c022b354e674dd69714feee3d3789a97e4 100644 --- a/habitat/core/embodied_task.py +++ b/habitat/core/embodied_task.py @@ -8,7 +8,7 @@ r"""Implements tasks and measurements needed for training and benchmarking of """ from collections import OrderedDict -from typing import Any, Dict, Iterable, Optional, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Type, Union import numpy as np @@ -175,6 +175,31 @@ class Measurements: """ return Metrics(self.measures) + def _get_measure_index(self, measure_name): + return list(self.measures.keys()).index(measure_name) + + def check_measure_dependencies( + self, measure_name: str, dependencies: List[str] + ): + r"""Checks if dependencies measures are enabled and calculatethat the measure + :param measure_name: a name of the measure for which has dependencies. + :param dependencies: a list of a measure names that are required by + the measure. + :return: + """ + measure_index = self._get_measure_index(measure_name) + for dependency_measure in dependencies: + assert ( + dependency_measure in self.measures + ), f"""{measure_name} measure requires {dependency_measure} + listed in tje measures list in the config.""" + + for dependency_measure in dependencies: + assert measure_index > self._get_measure_index( + dependency_measure + ), f"""{measure_name} measure requires be listed after {dependency_measure} + in tje measures list in the config.""" + class EmbodiedTask: r"""Base class for embodied tasks. ``EmbodiedTask`` holds definition of diff --git a/habitat/tasks/nav/nav.py b/habitat/tasks/nav/nav.py index d7b9e5981e78f7fa6d7bf7edb86ccfef04a3dff5..007ecbc06a41f75f03c07f52516562cfd3069bf0 100644 --- a/habitat/tasks/nav/nav.py +++ b/habitat/tasks/nav/nav.py @@ -299,8 +299,13 @@ class EpisodicCompassSensor(HeadingSensor): rotation_world_agent = agent_state.rotation rotation_world_start = quaternion_from_coeff(episode.start_rotation) - return self._quat_to_xy_heading( - rotation_world_agent.inverse() * rotation_world_start + return np.array( + [ + self._quat_to_xy_heading( + rotation_world_agent.inverse() * rotation_world_start + ) + ], + dtype=np.float32, ) @@ -407,6 +412,8 @@ class SPL(Measure): ref: On Evaluation of Embodied Agents - Anderson et. al https://arxiv.org/pdf/1807.06757.pdf + The measure depends on Distance to Goal measure to improve computational + performance for sophisticated goal areas. """ def __init__( @@ -424,17 +431,14 @@ class SPL(Measure): def _get_uuid(self, *args: Any, **kwargs: Any): return "spl" - def reset_metric(self, *args: Any, episode, **kwargs: Any): + def reset_metric(self, *args: Any, episode, task, **kwargs: Any): self._previous_position = self._sim.get_agent_state().position.tolist() self._start_end_episode_distance = episode.info["geodesic_distance"] self._agent_episode_distance = 0.0 - self._metric = None - if self._config.DISTANCE_TO == "VIEW_POINTS": - self._episode_view_points = [ - view_point.agent_state.position - for goal in episode.goals - for view_point in goal.view_points - ] + task.measurements.check_measure_dependencies( + self.uuid, [DistanceToGoal.cls_uuid] + ) + self.update_metric(*args, episode=episode, task=task, **kwargs) def _euclidean_distance(self, position_a, position_b): return np.linalg.norm( @@ -442,23 +446,13 @@ class SPL(Measure): ) def update_metric( - self, *args: Any, episode, action, task: EmbodiedTask, **kwargs: Any + self, *args: Any, episode, task: EmbodiedTask, **kwargs: Any ): ep_success = 0 current_position = self._sim.get_agent_state().position.tolist() - - if self._config.DISTANCE_TO == "POINT": - distance_to_target = self._sim.geodesic_distance( - current_position, [goal.position for goal in episode.goals] - ) - elif self._config.DISTANCE_TO == "VIEW_POINTS": - distance_to_target = self._sim.geodesic_distance( - current_position, self._episode_view_points - ) - else: - logger.error( - f"Non valid DISTANCE_TO parameter was provided: {self._config.DISTANCE_TO}" - ) + distance_to_target = task.measurements.measures[ + DistanceToGoal.cls_uuid + ].get_metric() if ( hasattr(task, "is_stop_called") @@ -795,10 +789,11 @@ class TopDownMap(Measure): @registry.register_measure class DistanceToGoal(Measure): - """The measure provides a set of metrics that illustrate agent's progress - towards the goal. + """The measure calculates a distance towards the goal. """ + cls_uuid: str = "distance_to_goal" + def __init__( self, sim: Simulator, config: Config, *args: Any, **kwargs: Any ): @@ -807,11 +802,12 @@ class DistanceToGoal(Measure): self._agent_episode_distance = None self._sim = sim self._config = config + self._episode_view_points = None super().__init__(**kwargs) def _get_uuid(self, *args: Any, **kwargs: Any): - return "distance_to_goal" + return self.cls_uuid def reset_metric(self, episode, *args: Any, **kwargs: Any): self._previous_position = self._sim.get_agent_state().position.tolist() @@ -820,18 +816,34 @@ class DistanceToGoal(Measure): ) self._agent_episode_distance = 0.0 self._metric = None + if self._config.DISTANCE_TO == "VIEW_POINTS": + self._episode_view_points = [ + view_point.agent_state.position + for goal in episode.goals + for view_point in goal.view_points + ] + self.update_metric(*args, episode=episode, **kwargs) def _euclidean_distance(self, position_a, position_b): return np.linalg.norm( np.array(position_b) - np.array(position_a), ord=2 ) - def update_metric(self, episode, action, *args: Any, **kwargs: Any): + def update_metric(self, episode, *args: Any, **kwargs: Any): current_position = self._sim.get_agent_state().position.tolist() - distance_to_target = self._sim.geodesic_distance( - current_position, episode.goals[0].position - ) + if self._config.DISTANCE_TO == "POINT": + distance_to_target = self._sim.geodesic_distance( + current_position, [goal.position for goal in episode.goals] + ) + elif self._config.DISTANCE_TO == "VIEW_POINTS": + distance_to_target = self._sim.geodesic_distance( + current_position, self._episode_view_points + ) + else: + logger.error( + f"Non valid DISTANCE_TO parameter was provided: {self._config.DISTANCE_TO}" + ) self._agent_episode_distance += self._euclidean_distance( current_position, self._previous_position @@ -839,13 +851,7 @@ class DistanceToGoal(Measure): self._previous_position = current_position - self._metric = { - "distance_to_target": distance_to_target, - "start_distance_to_target": self._start_end_episode_distance, - "distance_delta": self._start_end_episode_distance - - distance_to_target, - "agent_path_length": self._agent_episode_distance, - } + self._metric = distance_to_target @registry.register_task_action diff --git a/habitat/tasks/nav/object_nav_task.py b/habitat/tasks/nav/object_nav_task.py index e8db317419214727a076cb2347d0dfc8ec6c6987..87389448d4585dbdff185327a302ab87a8b39a64 100644 --- a/habitat/tasks/nav/object_nav_task.py +++ b/habitat/tasks/nav/object_nav_task.py @@ -109,7 +109,9 @@ class ObjectGoalSensor(Sensor): sensor_shape = (1,) max_value = (self.config.GOAL_SPEC_MAX_VAL - 1,) if self.config.GOAL_SPEC == "TASK_CATEGORY_ID": - max_value = len(self._dataset.category_to_task_category_id) + max_value = max( + self._dataset.category_to_task_category_id.values() + ) return spaces.Box( low=0, high=max_value, shape=sensor_shape, dtype=np.int64 @@ -134,7 +136,10 @@ class ObjectGoalSensor(Sensor): ) return None category_name = episode.goals[0].object_category - return self._dataset.category_to_task_category_id[category_name] + return np.array( + [self._dataset.category_to_task_category_id[category_name]], + dtype=np.int64, + ) elif self.config.GOAL_SPEC == "OBJECT_ID": return np.array([episode.goals[0].object_name_id], dtype=np.int64) else: diff --git a/habitat_baselines/common/environments.py b/habitat_baselines/common/environments.py index b7d43dd888a8f60fe0084b40058460f3d199dd66..99eeb14080567526da13228e88a165cee1fd1c81 100644 --- a/habitat_baselines/common/environments.py +++ b/habitat_baselines/common/environments.py @@ -10,7 +10,7 @@ in habitat. Customized environments should be registered using ``@baseline_registry.register_env(name="myEnv")` for reusability """ -from typing import Any, Dict, Optional, Type, Union +from typing import Optional, Type import habitat from habitat import Config, Dataset @@ -34,20 +34,18 @@ class NavRLEnv(habitat.RLEnv): def __init__(self, config: Config, dataset: Optional[Dataset] = None): self._rl_config = config.RL self._core_env_config = config.TASK_CONFIG + self._reward_measure_name = self._rl_config.REWARD_MEASURE + self._success_measure_name = self._rl_config.SUCCESS_MEASURE - self._previous_target_distance = None + self._previous_measure = None self._previous_action = None - self._episode_distance_covered = None - self._success_distance = self._core_env_config.TASK.SUCCESS_DISTANCE super().__init__(self._core_env_config, dataset) def reset(self): self._previous_action = None - observations = super().reset() - - self._previous_target_distance = self.habitat_env.current_episode.info[ - "geodesic_distance" + self._previous_measure = self._env.get_metrics()[ + self._reward_measure_name ] return observations @@ -64,30 +62,18 @@ class NavRLEnv(habitat.RLEnv): def get_reward(self, observations): reward = self._rl_config.SLACK_REWARD - current_target_distance = self._distance_target() - reward += self._previous_target_distance - current_target_distance - self._previous_target_distance = current_target_distance + current_measure = self._env.get_metrics()[self._reward_measure_name] + + reward += self._previous_measure - current_measure + self._previous_measure = current_measure if self._episode_success(): reward += self._rl_config.SUCCESS_REWARD return reward - def _distance_target(self): - current_position = self._env.sim.get_agent_state().position.tolist() - distance = self._env.sim.geodesic_distance( - current_position, - [goal.position for goal in self._env.current_episode.goals], - ) - return distance - def _episode_success(self): - if ( - self._env.task.is_stop_called - and self._distance_target() < self._success_distance - ): - return True - return False + return self._env.get_metrics()[self._success_measure_name] def get_done(self, observations): done = False diff --git a/habitat_baselines/config/default.py b/habitat_baselines/config/default.py index b4a3abbcf789678f14ad9bf1f553f86851716ade..2d3b3f068358acb2e1202ff0cfbcaf7a254d4561 100644 --- a/habitat_baselines/config/default.py +++ b/habitat_baselines/config/default.py @@ -47,6 +47,8 @@ _C.EVAL.USE_CKPT_CONFIG = True # REINFORCEMENT LEARNING (RL) ENVIRONMENT CONFIG # ----------------------------------------------------------------------------- _C.RL = CN() +_C.RL.REWARD_MEASURE = "distance_to_goal" +_C.RL.SUCCESS_MEASURE = "spl" _C.RL.SUCCESS_REWARD = 10.0 _C.RL.SLACK_REWARD = -0.01 # ----------------------------------------------------------------------------- diff --git a/habitat_baselines/config/objectnav/ddppo_objectnav.yaml b/habitat_baselines/config/objectnav/ddppo_objectnav.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9d30f2245e0df02425af9cf873865f8390861e2 --- /dev/null +++ b/habitat_baselines/config/objectnav/ddppo_objectnav.yaml @@ -0,0 +1,60 @@ +BASE_TASK_CONFIG_PATH: "configs/tasks/objectnav_mp3d.yaml" +TRAINER_NAME: "ppo" +ENV_NAME: "NavRLEnv" +SIMULATOR_GPU_ID: 0 +TORCH_GPU_ID: 1 +VIDEO_OPTION: ["disk", "tensorboard"] +TENSORBOARD_DIR: "/checkpoint/maksymets/logs/habitat_baselines/ddppo/obj_nav_mp3d" +VIDEO_DIR: "data/video_dir" +TEST_EPISODE_COUNT: 2 +EVAL_CKPT_PATH_DIR: "data/new_checkpoints" +NUM_PROCESSES: 2 +SENSORS: ["DEPTH_SENSOR"] +CHECKPOINT_FOLDER: "data/new_checkpoints" +NUM_UPDATES: 10000 +LOG_INTERVAL: 10 +CHECKPOINT_INTERVAL: 50 + +RL: + SUCCESS_REWARD: 2.5 + PPO: + # ppo params + clip_param: 0.2 + ppo_epoch: 2 + num_mini_batch: 2 + value_loss_coef: 0.5 + entropy_coef: 0.01 + lr: 2.5e-6 + eps: 1e-5 + max_grad_norm: 0.2 + num_steps: 128 + use_gae: True + gamma: 0.99 + tau: 0.95 + use_linear_clip_decay: False + use_linear_lr_decay: False + reward_window_size: 50 + + use_normalized_advantage: False + + hidden_size: 512 + + DDPPO: + sync_frac: 0.6 + # The PyTorch distributed backend to use + distrib_backend: NCCL + # Visual encoder backbone + pretrained_weights: data/ddppo-models/gibson-2plus-resnet50.pth + # Initialize with pretrained weights + pretrained: False + # Initialize just the visual encoder backbone with pretrained weights + pretrained_encoder: False + # Whether or not the visual encoder backbone will be trained. + train_encoder: True + # Whether or not to reset the critic linear layer + reset_critic: True + + # Model parameters + backbone: resnet50 + rnn_type: LSTM + num_recurrent_layers: 2