diff --git a/habitat_baselines/common/utils.py b/habitat_baselines/common/utils.py
index feb24bf04a5990fc93994385a691f9403c9c7051..5e650903b4055b45c820071bc1db0c7ac70fd6da 100644
--- a/habitat_baselines/common/utils.py
+++ b/habitat_baselines/common/utils.py
@@ -120,7 +120,8 @@ def generate_video(
     images: List[np.ndarray],
     episode_id: int,
     checkpoint_idx: int,
-    spl: float,
+    metric_name: str,
+    metric_value: float,
     tb_writer: TensorboardWriter,
     fps: int = 10,
 ) -> None:
@@ -132,7 +133,8 @@ def generate_video(
         images: list of images to be converted to video.
         episode_id: episode id for video naming.
         checkpoint_idx: checkpoint index for video naming.
-        spl: SPL for this episode for video naming.
+        metric_name: name of the performance metric, e.g. "spl".
+        metric_value: value of metric.
         tb_writer: tensorboard writer object for uploading video.
         fps: fps for generated video.
     Returns:
@@ -141,7 +143,7 @@ def generate_video(
     if len(images) < 1:
         return
 
-    video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_spl{spl:.2f}"
+    video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_{metric_name}{metric_value:.2f}"
     if "disk" in video_option:
         assert video_dir is not None
         images_to_video(images, video_dir, video_name)
diff --git a/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat_baselines/rl/ppo/ppo_trainer.py
index e508cf0d3da467f9d98ba8de47900ce88298d417..5eba415e9e9a24acae456160a19ce00659bb0a03 100644
--- a/habitat_baselines/rl/ppo/ppo_trainer.py
+++ b/habitat_baselines/rl/ppo/ppo_trainer.py
@@ -379,6 +379,15 @@ class PPOTrainer(BaseRLTrainer):
         self.agent.load_state_dict(ckpt_dict["state_dict"])
         self.actor_critic = self.agent.actor_critic
 
+        # get name of performance metric, e.g. "spl"
+        metric_name = self.config.TASK_CONFIG.TASK.MEASUREMENTS[0]
+        metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name)
+        measure_type = baseline_registry.get_measure(metric_cfg.TYPE)
+        assert measure_type is not None, "invalid measurement type {}".format(
+            metric_cfg.TYPE
+        )
+        self.metric_uuid = measure_type(None, None)._get_uuid()
+
         observations = self.envs.reset()
         batch = batch_obs(observations)
         for sensor in batch:
@@ -457,8 +466,12 @@ class PPOTrainer(BaseRLTrainer):
                 # episode ended
                 if not_done_masks[i].item() == 0:
                     episode_stats = dict()
-                    episode_stats["spl"] = infos[i]["spl"]
-                    episode_stats["success"] = int(infos[i]["spl"] > 0)
+                    episode_stats[self.metric_uuid] = infos[i][
+                        self.metric_uuid
+                    ]
+                    episode_stats["success"] = int(
+                        infos[i][self.metric_uuid] > 0
+                    )
                     episode_stats["reward"] = current_episode_reward[i].item()
                     current_episode_reward[i] = 0
                     # use scene_id + episode_id as unique id for storing stats
@@ -476,7 +489,8 @@ class PPOTrainer(BaseRLTrainer):
                             images=rgb_frames[i],
                             episode_id=current_episodes[i].episode_id,
                             checkpoint_idx=checkpoint_index,
-                            spl=infos[i]["spl"],
+                            metric_name=self.metric_uuid,
+                            metric_value=infos[i][self.metric_uuid],
                             tb_writer=writer,
                         )
 
@@ -516,12 +530,14 @@ class PPOTrainer(BaseRLTrainer):
         num_episodes = len(stats_episodes)
 
         episode_reward_mean = aggregated_stats["reward"] / num_episodes
-        episode_spl_mean = aggregated_stats["spl"] / num_episodes
+        episode_metric_mean = aggregated_stats[self.metric_uuid] / num_episodes
         episode_success_mean = aggregated_stats["success"] / num_episodes
 
         logger.info(f"Average episode reward: {episode_reward_mean:.6f}")
         logger.info(f"Average episode success: {episode_success_mean:.6f}")
-        logger.info(f"Average episode SPL: {episode_spl_mean:.6f}")
+        logger.info(
+            f"Average episode {self.metric_uuid}: {episode_metric_mean:.6f}"
+        )
 
         writer.add_scalars(
             "eval_reward",
@@ -529,7 +545,9 @@ class PPOTrainer(BaseRLTrainer):
             checkpoint_index,
         )
         writer.add_scalars(
-            "eval_SPL", {"average SPL": episode_spl_mean}, checkpoint_index
+            f"eval_{self.metric_uuid}",
+            {f"average {self.metric_uuid}": episode_metric_mean},
+            checkpoint_index,
         )
         writer.add_scalars(
             "eval_success",