Updated dataset pre-processing to include JRDB Challenge Dataset.

PiperOrigin-RevId: 579608449
google-research · Nov 5, 2023 · 7825e05 · 7825e05
1 parent bf5a824
commit 7825e05
Show file tree

Hide file tree

Showing 11 changed files with 419 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+:trophy: Winner of the [2023 JRDB Trajectory Prediction Challenge](https://jrdb.erc.monash.edu/leaderboards/trajectory)
+
 # Human Scene Transformer
 
 The (Human) Scene Transformer architecture (as described [here](https://arxiv.org/pdf/2309.17209.pdf) and [here)](https://arxiv.org/pdf/2106.08417.pdf) is a general and extendable trajectory prediction framework which threats trajectory prediction as a sequence to sequence problem and models it in a Transformer architecture.
@@ -82,6 +84,21 @@ python train.py --model_base_dir=./models/pedestrians_eth  --gin_files=..config/
 
 ---
 
+## JRDB Trajectory Prediction Challenge Results
+To reproduce our winning results in the [2023 JRDB Trajectory Prediction Challenge](https://jrdb.erc.monash.edu/leaderboards/trajectory):
+
+- Make sure that you follow the [data pre-processing instructions](/human_scene_transformer/data) and pay special attention to where the instructions differentiate between the JRDB Challenge dataset and the original paper dataset.
+
+- Download the trained challenge model [here](https://storage.googleapis.com/gresearch/human_scene_transformer/challenge_checkpoint.zip)
+
+- Run
+
+```
+python jrdb/eval_challenge.py --model_path=<path_to_challenge_model_folder> --checkpoint_path=<path_to_challenge_model_folder>/ckpts/ckpt-20 --dataset_path=<dataset_path> --output_path=<result_folder>
+```
+
+---
+
 ## Evaluation
 
 ### JRDB

diff --git a/human_scene_transformer/config/jrdb_challenge/dataset_params.gin b/human_scene_transformer/config/jrdb_challenge/dataset_params.gin
@@ -0,0 +1,111 @@
+TRAIN_SCENES = ['bytes-cafe-2019-02-07_0',
+ 'clark-center-2019-02-28_0',
+ 'clark-center-intersection-2019-02-28_0',
+ 'cubberly-auditorium-2019-04-22_0',
+ 'gates-159-group-meeting-2019-04-03_0',
+ 'gates-ai-lab-2019-02-08_0',
+ 'gates-to-clark-2019-02-28_1',
+ 'hewlett-packard-intersection-2019-01-24_0',
+ 'huang-basement-2019-01-25_0',
+ 'huang-lane-2019-02-12_0',
+ 'memorial-court-2019-03-16_0',
+ 'meyer-green-2019-03-16_0',
+ 'packard-poster-session-2019-03-20_0',
+ 'packard-poster-session-2019-03-20_1',
+ 'stlc-111-2019-04-19_0',
+ 'svl-meeting-gates-2-2019-04-08_0',
+ 'tressider-2019-03-16_0',
+ 'tressider-2019-03-16_1',
+ 'cubberly-auditorium-2019-04-22_1_test',
+ 'discovery-walk-2019-02-28_0_test',
+ 'food-trucks-2019-02-12_0_test',
+ 'gates-ai-lab-2019-04-17_0_test',
+ 'gates-foyer-2019-01-17_0_test',
+ 'gates-to-clark-2019-02-28_0_test',
+ 'hewlett-class-2019-01-23_1_test',
+ 'huang-2-2019-01-25_1_test',
+ 'indoor-coupa-cafe-2019-02-06_0_test',
+ 'lomita-serra-intersection-2019-01-30_0_test',
+ 'nvidia-aud-2019-01-25_0_test',
+ 'nvidia-aud-2019-04-18_1_test',
+ 'outdoor-coupa-cafe-2019-02-06_0_test',
+ 'quarry-road-2019-02-28_0_test',
+ 'stlc-111-2019-04-19_1_test',
+ 'stlc-111-2019-04-19_2_test',
+ 'tressider-2019-04-26_0_test',
+ 'tressider-2019-04-26_1_test',
+ 'clark-center-2019-02-28_1',
+ 'forbes-cafe-2019-01-22_0',
+ 'gates-basement-elevators-2019-01-17_1',
+ 'huang-2-2019-01-25_0',
+ 'jordan-hall-2019-04-22_0',
+ 'nvidia-aud-2019-04-18_0',
+ 'packard-poster-session-2019-03-20_2',
+ 'svl-meeting-gates-2-2019-04-08_1',
+ 'tressider-2019-04-26_2',
+ 'discovery-walk-2019-02-28_1_test',
+ 'gates-basement-elevators-2019-01-17_0_test',
+ 'hewlett-class-2019-01-23_0_test',
+ 'huang-intersection-2019-01-22_0_test',
+ 'meyer-green-2019-03-16_1_test',
+ 'nvidia-aud-2019-04-18_2_test',
+ 'serra-street-2019-01-30_0_test',
+ 'tressider-2019-03-16_2_test',
+ 'tressider-2019-04-26_3_test']
+
+TEST_SCENES = [
+    'cubberly-auditorium-2019-04-22_1_test',
+    'discovery-walk-2019-02-28_0_test',
+    'discovery-walk-2019-02-28_1_test',
+    'food-trucks-2019-02-12_0_test',
+    'gates-ai-lab-2019-04-17_0_test',
+    'gates-basement-elevators-2019-01-17_0_test',
+    'gates-foyer-2019-01-17_0_test',
+    'gates-to-clark-2019-02-28_0_test',
+    'hewlett-class-2019-01-23_0_test',
+    'hewlett-class-2019-01-23_1_test',
+    'huang-2-2019-01-25_1_test',
+    'huang-intersection-2019-01-22_0_test',
+    'indoor-coupa-cafe-2019-02-06_0_test',
+    'lomita-serra-intersection-2019-01-30_0_test',
+    'meyer-green-2019-03-16_1_test',
+    'nvidia-aud-2019-01-25_0_test',
+    'nvidia-aud-2019-04-18_1_test',
+    'nvidia-aud-2019-04-18_2_test',
+    'outdoor-coupa-cafe-2019-02-06_0_test',
+    'quarry-road-2019-02-28_0_test',
+    'serra-street-2019-01-30_0_test',
+    'stlc-111-2019-04-19_1_test',
+    'stlc-111-2019-04-19_2_test',
+    'tressider-2019-03-16_2_test',
+    'tressider-2019-04-26_0_test',
+    'tressider-2019-04-26_1_test',
+    'tressider-2019-04-26_3_test',
+]
+
+
+JRDBDatasetParams.path = <dataset_path>
+
+JRDBDatasetParams.train_scenes = %TRAIN_SCENES
+JRDBDatasetParams.eval_scenes = %TEST_SCENES
+JRDBDatasetParams.features = [
+        'agents/position',
+        'agents/keypoints',
+        'robot/position',
+        'robot/orientation',
+        'scene/pc'
+        ]
+
+JRDBDatasetParams.train_split = (0., 1.0)
+JRDBDatasetParams.eval_split = (0., 1.0)
+
+
+JRDBDatasetParams.num_history_steps = 11
+JRDBDatasetParams.num_steps = 24
+JRDBDatasetParams.num_agents = 16
+JRDBDatasetParams.timestep = 0.4
+
+JRDBDatasetParams.subsample = 6
+JRDBDatasetParams.num_pointcloud_points = 512
+
+JRDBDatasetParams.min_distance_to_robot = 50.0
diff --git a/human_scene_transformer/config/jrdb_challenge/metrics.gin b/human_scene_transformer/config/jrdb_challenge/metrics.gin
@@ -0,0 +1,68 @@
+# All available metrics.
+min_ade/metrics.ade.MinADE.cutoff_seconds = None
+min_ade1s/metrics.ade.MinADE.cutoff_seconds = 1.0
+min_ade2s/metrics.ade.MinADE.cutoff_seconds = 2.0
+min_ade3s/metrics.ade.MinADE.cutoff_seconds = 3.0
+min_ade4s/metrics.ade.MinADE.cutoff_seconds = 4.0
+
+ml_ade/metrics.ade.MLADE.cutoff_seconds = None
+ml_ade1s/metrics.ade.MLADE.cutoff_seconds = 1.0
+ml_ade2s/metrics.ade.MLADE.cutoff_seconds = 2.0
+ml_ade3s/metrics.ade.MLADE.cutoff_seconds = 3.0
+ml_ade4s/metrics.ade.MLADE.cutoff_seconds = 4.8
+
+pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = None
+pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 1.0
+pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 2.0
+pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 3.0
+pos_nll4s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 4.8
+
+# Training metrics.
+get_metrics.train_metrics = {
+    'loss': @metrics.Mean,
+    'loss_position': @metrics.Mean,
+    'loss_orientation': @metrics.Mean,
+
+    'min_ade': @min_ade/metrics.ade.MinADE,
+    'min_ade1s': @min_ade1s/metrics.ade.MinADE,
+    'min_ade2s': @min_ade2s/metrics.ade.MinADE,
+    'min_ade3s': @min_ade3s/metrics.ade.MinADE,
+    'min_ade4s': @min_ade4s/metrics.ade.MinADE,
+
+    'ml_ade': @ml_ade/metrics.ade.MLADE,
+    'ml_ade1s': @ml_ade1s/metrics.ade.MLADE,
+    'ml_ade2s': @ml_ade2s/metrics.ade.MLADE,
+    'ml_ade3s': @ml_ade3s/metrics.ade.MLADE,
+    'ml_ade4s': @ml_ade4s/metrics.ade.MLADE,
+
+    'pos_nll': @pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood,
+    'pos_nll1s': @pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood,
+    'pos_nll2s': @pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood,
+    'pos_nll3s': @pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood,
+    'pos_nll4s': @pos_nll4s/metrics.pos_nll.PositionNegativeLogLikelihood,
+}
+
+# Eval metrics.
+get_metrics.eval_metrics = {
+    'loss': @metrics.Mean,
+    'loss_position': @metrics.Mean,
+    'loss_orientation': @metrics.Mean,
+
+    'min_ade': @min_ade/metrics.ade.MinADE,
+    'min_ade1s': @min_ade1s/metrics.ade.MinADE,
+    'min_ade2s': @min_ade2s/metrics.ade.MinADE,
+    'min_ade3s': @min_ade3s/metrics.ade.MinADE,
+    'min_ade4s': @min_ade4s/metrics.ade.MinADE,
+
+    'ml_ade': @ml_ade/metrics.ade.MLADE,
+    'ml_ade1s': @ml_ade1s/metrics.ade.MLADE,
+    'ml_ade2s': @ml_ade2s/metrics.ade.MLADE,
+    'ml_ade3s': @ml_ade3s/metrics.ade.MLADE,
+    'ml_ade4s': @ml_ade4s/metrics.ade.MLADE,
+
+    'pos_nll': @pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood,
+    'pos_nll1s': @pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood,
+    'pos_nll2s': @pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood,
+    'pos_nll3s': @pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood,
+    'pos_nll4s': @pos_nll4s/metrics.pos_nll.PositionNegativeLogLikelihood,
+}
diff --git a/human_scene_transformer/config/jrdb_challenge/model_params.gin b/human_scene_transformer/config/jrdb_challenge/model_params.gin
@@ -0,0 +1,32 @@
+ModelParams.agents_position_key = 'agents/position'
+ModelParams.agents_feature_config = {
+    'agents/position': @AgentPositionEncoder,
+    'agents/keypoints': @AgentKeypointsEncoder,
+    #'agents/gaze': @Agent2DOrientationEncoder,
+}
+ModelParams.hidden_size = 128
+ModelParams.feature_embedding_size = 128
+ModelParams.transformer_ff_dim = 128
+
+ModelParams.num_heads = 4
+ModelParams.num_modes = 4
+ModelParams.scene_encoder = @PointCloudEncoderLayer
+ModelParams.attn_architecture = (
+    'self-attention',
+    'self-attention',
+    'cross-attention',
+    'multimodality_induction',
+    'self-attention',
+    'self-attention-mode',
+    'self-attention',
+    'self-attention-mode',
+    )
+ModelParams.mask_style = "has_historic_data"
+ModelParams.drop_prob = 0.1
+ModelParams.prediction_head = @Prediction2DPositionHeadLayer
+
+ModelParams.num_history_steps = 11
+ModelParams.num_steps = 24
+ModelParams.timestep = 0.4
+# Must be one of the classes in is_hidden_generators.py.
+ModelParams.is_hidden_generator = @BPIsHiddenGenerator
diff --git a/human_scene_transformer/config/jrdb_challenge/training_params.gin b/human_scene_transformer/config/jrdb_challenge/training_params.gin
@@ -0,0 +1,10 @@
+TrainingParams.batch_size = 64
+TrainingParams.shuffle_buffer_size = 10000
+TrainingParams.total_train_steps = 2e6
+TrainingParams.warmup_steps = 5e4
+TrainingParams.peak_learning_rate = 1e-4
+#TrainingParams.global_clipnorm = 1.
+TrainingParams.batches_per_train_step = 25000
+TrainingParams.batches_per_eval_step = 2000
+TrainingParams.eval_every_n_step = 1e4
+TrainingParams.loss = @MultimodalPositionNLLLoss
diff --git a/human_scene_transformer/data/README.md b/human_scene_transformer/data/README.md
@@ -11,23 +11,32 @@
 5. Download and extract [Train Detections](https://jrdb.erc.monash.edu/static/downloads/train_detections.zip) from the JRDB 2019 section to `<data_path>/detections`.
 
 ## Get the Leaderboard Test Set Tracks
-Download and extract this leaderboard  [3D tracking result](https://jrdb.erc.monash.edu/leaderboards/download/1605) to `<data_path>/test_dataset/labels/raw_leaderboard/`. Such that you have `<data_path>/test_dataset/labels/raw_leaderboard/00XX.txt` This is the best available leaderboard tracker at the time the code was developed.
 
-## Get the Robot Odometry Preprocessed Keypoints
+### For the JRDB Challenge Dataset
+Download and extract this leaderboard  [3D tracking result](https://jrdb.erc.monash.edu/leaderboards/download/1762) to `<data_path>/test_dataset/labels/PiFeNet/`. Such that you have `<data_path>/test_dataset/labels/PiFeNet/00XX.txt`.
 
-Download the compressed data file [here](https://storage.googleapis.com/gresearch/human_scene_transformer/data.zip).
+### For the Orginal Dataset used in the Paper
+Download and extract this leaderboard  [3D tracking result](https://jrdb.erc.monash.edu/leaderboards/download/1605) to `<data_path>/test_dataset/labels/ss3d_mot/`. Such that you have `<data_path>/test_dataset/labels/ss3d_mot/00XX.txt`. This was the best available leaderboard tracker at the time the method was developed.
 
-Extract the files and move them to `<data_path>/processed/` such that you have `<data_path>/processed/odoemtry_train`,  `<data_path>/processed/odoemtry_test` and `<data_path>/processed/labels/labels_3d_keypoints_train/`, `<data_path>/processed/labels/labels_3d_keypoints_test/`.
+## Get the Robot Odometry
+
+Download the compressed Odometry data file [here](https://storage.googleapis.com/gresearch/human_scene_transformer/odometry.zip).
+
+Extract the files and move them to `<data_path>/processed/` such that you have `<data_path>/processed/odoemtry/train`,  `<data_path>/processed/odoemtry/test`.
 
 Alternatively you can extract the robot odometry from the raw rosbags yourself via `extract_robot_odometry_from_rosbag.py`.
 
-## Create Real-World Tracks for Test Data
+## Get the Preprocessed Keypoints
+
+Download the compressed Keypoints data file [here](https://storage.googleapis.com/gresearch/human_scene_transformer/keypoints.zip).
+
+Extract the files and move them to `<data_path>/processed/` such that you have  `<data_path>/processed/labels/labels_3d_keypoints/train/`, `<data_path>/processed/labels/labels_3d_keypoints/test/`.
 
-Adapt `<data_path>` in `jrdb_train_detections_to_tracks.py`
+## Create Real-World Tracks for Train Data
 
-Then run
+Run
 
-```python jrdb_train_detections_to_tracks.py```
+```python jrdb_train_detections_to_tracks.py --input_path=<data_path>```
 
 ## Dataset Folder
 
@@ -48,22 +57,30 @@ You should end up with a dataset folder of the following structure
     - pointclouds
   - processed
     - labels
-      - labels_3d_keypoints_test
-      - labels_3d_keypoints_train
+      - labels_3d_keypoints
+        - train
+        - test
       - labels_detections_3d
-    - odoemtry_test
-    - odoemetry_train
+    - odoemtry
+      - train
+      - test
 ```
 
 ## Generate the Tensorflow Dataset
-Adapt `<data_path>` in `jrdb_preprocess_train.py` and `jrdb_preprocess_test.py`.
+### For the JRDB Challenge Dataset
+```python jrdb_preprocess_train.py --input_path=<data_path> --output_path=<output_path> --max_distance_to_robot=50.0```
 
-Set `<output_path>` in `jrdb_preprocess_train.py` and `jrdb_preprocess_test.py` to where you want to store the processed tensorflow dataset.
+```python jrdb_preprocess_test.py --input_path=<data_path> --output_path=<output_path> --max_distance_to_robot=50.0 --tracking_method=PiFeNet --tracking_confidence_threshold=0.01```
+
+Please note that this can take multiple hours due to the processing of the scene's
+pointclouds. If you do not need the pointclouds you can speed up the processing
+by passing `--process_pointclouds=False` for both.
 
-```python jrdb_preprocess_train.py```
+### For the Orginal Dataset used in the Paper
+```python jrdb_preprocess_train.py --input_path=<data_path> --output_path=<output_path> --max_distance_to_robot=15.0```
 
-```python jrdb_preprocess_test.py```
+```python jrdb_preprocess_test.py --input_path=<data_path> --output_path=<output_path> --max_distance_to_robot=15.0 --tracking_method=ss3d_mot```
 
 Please note that this can take multiple hours due to the processing of the scene's
 pointclouds. If you do not need the pointclouds you can speed up the processing
-by setting `POINTCLOUD=False` in both files.
+by passing `--process_pointclouds=False` for both.