diff --git a/.circleci/config.yml b/.circleci/config.yml index 22447be442681c57313ea1c8cbff01fa4c876064..ad0e46f2e8fc978921994cc6204badf0fe1818b9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -87,7 +87,7 @@ jobs: chmod +x ~/miniconda.sh ~/miniconda.sh -b -p $HOME/miniconda rm ~/miniconda.sh - export PATH=$HOME/miniconda/bin:$PATH + export PATH=$HOME/miniconda/bin:/usr/local/cuda/bin:$PATH conda create -y -n habitat python=3.6 . activate habitat conda install -q -y -c conda-forge ninja ccache numpy pytest pytest-mock @@ -99,7 +99,7 @@ jobs: command: | if [ ! -f ~/miniconda/pytorch_installed ] then - export PATH=$HOME/miniconda/bin:$PATH + export PATH=$HOME/miniconda/bin:/usr/local/cuda/bin:$PATH . activate habitat; conda install -c conda-forge opencv -y conda install -y pytorch torchvision cudatoolkit=10.0 -c pytorch @@ -116,7 +116,7 @@ jobs: - run: name: CCache initialization command: | - export PATH=$HOME/miniconda/bin:$PATH + export PATH=$HOME/miniconda/bin:/usr/local/cuda/bin:$PATH . activate habitat; ccache --show-stats ccache --zero-stats @@ -126,7 +126,7 @@ jobs: no_output_timeout: 20m command: | while [ ! -f ./cuda_installed ]; do sleep 2; done # wait for CUDA - export PATH=$HOME/miniconda/bin:$PATH + export PATH=$HOME/miniconda/bin:/usr/local/cuda/bin:$PATH . activate habitat; if [ ! -d ./habitat-sim ] then @@ -139,7 +139,7 @@ jobs: name: Ccache stats when: always command: | - export PATH=$HOME/miniconda/bin:$PATH + export PATH=$HOME/miniconda/bin:/usr/local/cuda/bin:$PATH . activate habitat; ccache --show-stats - run: @@ -156,7 +156,7 @@ jobs: name: Run sim benchmark command: | while [ ! -f ./cuda_installed ]; do sleep 2; done # wait for CUDA - export PATH=$HOME/miniconda/bin:$PATH + export PATH=$HOME/miniconda/bin:/usr/local/cuda/bin:$PATH . activate habitat; cd habitat-sim python examples/example.py --scene data/scene_datasets/habitat-test-scenes/van-gogh-room.glb --silent --test_fps_regression $FPS_THRESHOLD - save_cache: @@ -173,7 +173,7 @@ jobs: name: Install api no_output_timeout: 20m command: | - export PATH=$HOME/miniconda/bin:$PATH + export PATH=$HOME/miniconda/bin:/usr/local/cuda/bin:$PATH . activate habitat; cd habitat-api while [ ! -f ~/miniconda/pytorch_installed ]; do sleep 2; done # wait for Pytorch ln -s ../habitat-sim/data data @@ -187,7 +187,7 @@ jobs: - run: name: Run api tests command: | - export PATH=$HOME/miniconda/bin:$PATH + export PATH=$HOME/miniconda/bin:/usr/local/cuda/bin:$PATH . activate habitat; cd habitat-api python setup.py develop --all python setup.py test diff --git a/habitat_baselines/agents/ppo_agents.py b/habitat_baselines/agents/ppo_agents.py index d4bc64666a425b59d66d7c62be99b7ea5b7b3627..a4d39079bec1b07aba163ed1f7ea3f1b63b8664b 100644 --- a/habitat_baselines/agents/ppo_agents.py +++ b/habitat_baselines/agents/ppo_agents.py @@ -121,7 +121,12 @@ class PPOAgent(Agent): batch[sensor] = batch[sensor].to(self.device) with torch.no_grad(): - _, actions, _, self.test_recurrent_hidden_states = self.actor_critic.act( + ( + _, + actions, + _, + self.test_recurrent_hidden_states, + ) = self.actor_critic.act( batch, self.test_recurrent_hidden_states, self.prev_actions, diff --git a/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat_baselines/rl/ppo/ppo_trainer.py index 79dbfe6bb866c3c8f7b698a5467f274e3866ed92..c9bf606de11033e5a8f4bd0516240d9fb57ca2b3 100644 --- a/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat_baselines/rl/ppo/ppo_trainer.py @@ -272,7 +272,11 @@ class PPOTrainer(BaseRLTrainer): ) for step in range(ppo_cfg.num_steps): - delta_pth_time, delta_env_time, delta_steps = self._collect_rollout_step( + ( + delta_pth_time, + delta_env_time, + delta_steps, + ) = self._collect_rollout_step( rollouts, current_episode_reward, episode_rewards, @@ -282,9 +286,12 @@ class PPOTrainer(BaseRLTrainer): env_time += delta_env_time count_steps += delta_steps - delta_pth_time, value_loss, action_loss, dist_entropy = self._update_agent( - ppo_cfg, rollouts - ) + ( + delta_pth_time, + value_loss, + action_loss, + dist_entropy, + ) = self._update_agent(ppo_cfg, rollouts) pth_time += delta_pth_time window_episode_reward.append(episode_rewards.clone()) @@ -442,7 +449,12 @@ class PPOTrainer(BaseRLTrainer): current_episodes = self.envs.current_episodes() with torch.no_grad(): - _, actions, _, test_recurrent_hidden_states = self.actor_critic.act( + ( + _, + actions, + _, + test_recurrent_hidden_states, + ) = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions,