Skip to content

Commit

Permalink
Add multistage module that optionalizes things, add linear incrementi…
Browse files Browse the repository at this point in the history
…ng stage
  • Loading branch information
Purg committed Dec 1, 2024
1 parent 872af86 commit beaaa59
Show file tree
Hide file tree
Showing 2 changed files with 259 additions and 0 deletions.
136 changes: 136 additions & 0 deletions configs/experiment/m2/feat_locsconfs_residualLinear.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# @package _global_

defaults:
- override /data: ptg
- override /model: ptg
- override /callbacks: default
- override /trainer: gpu
- override /paths: default
#- override /logger: aim
- override /logger: csv

# all parameters below will be merged with parameters from default configurations set above
# this allows you to overwrite only specified parameters

# Change this name to something descriptive and unique for this experiment.
# This will differentiate the run logs and output to be separate from other
# experiments that may have been run under the configured
# Setting this value influences:
# - the name of the directory under `${paths.root_dir}/logs/` in which training
# run files are stored.
# Default is "train" set in the "configs/train.yaml" file.
#task_name:

# simply provide checkpoint path to resume training
#ckpt_path: null

tags: ["m2", "ms_tcn", "debug"]

seed: 12345

trainer:
min_epochs: 50
max_epochs: 500
log_every_n_steps: 1

model:
_target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModelResidual
num_classes: 9 # number of activity classification classes
linear_single_stage: true
num_layers: 24 # window_size - 1, for largest residual
do_stage_residual: true
do_stage_softmax: false
compile: false
scheduler:
# Code change to track train/loss instead of val/loss.
factor: 0.9
patience: 10
net:
# Length of feature vector for a single frame.
# Currently derived from the parameterization of dataset vectorizer.
dim: 102

data:
coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json"
coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json"
coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimations.coco.json"

coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json"
coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json"
coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimations.coco.json"

coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json"
coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json"
coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimations.coco.json"

batch_size: 512
num_workers: 16
target_framerate: 15 # BBN Hololens2 Framerate
epoch_sample_factor: 1 # 1x the dataset size iterations for train/val

train_dataset:
window_size: 25
window_label_idx: ${model.pred_frame_index}
vectorize:
_target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs
top_k: 1
num_classes: 7
use_joint_confs: True
use_pixel_norm: True
use_joint_obj_offsets: False
background_idx: 0
# Augmentations on windows of frame data before performing vectorization.
transform_frame_data:
transforms:
- _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform
# These parameters are a fudge for now to experiment. Window presence
# looks qualitatively right with what we're seeing live.
frame_rate: ${data.target_framerate}
dets_throughput_mean: 14.5
pose_throughput_mean: 10
dets_latency: 0
pose_latency: 0.1
dets_throughput_std: 0.2
pose_throughput_std: 0.2
fixed_pattern: false
- _target_: tcn_hpl.data.frame_data_aug.rotate_scale_translate_jitter.FrameDataRotateScaleTranslateJitter
translate: 0.05
scale: [0.9, 1.1]
rotate: [-5, 5]
det_loc_jitter: 0.02
det_wh_jitter: 0.02
pose_kp_loc_jitter: 0.005
dets_score_jitter: 0.
pose_score_jitter: 0.
pose_kp_score_jitter: 0.
val_dataset:
# Augmentations on windows of frame data before performing vectorization.
# Sharing transform with training dataset as it is only the drop-out aug to
# simulate stream processing dropout the same.
transform_frame_data:
transforms:
- _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform
# Mirror training hparams, except used fixed patterns.
frame_rate: ${data.target_framerate}
dets_throughput_mean: 14.5
pose_throughput_mean: 10
dets_latency: 0
pose_latency: 0.1
dets_throughput_std: 0.2
pose_throughput_std: 0.2
fixed_pattern: true
# Test dataset usually configured the same as val, unless there is some
# different set of transforms that should be used during test/prediction.

paths:
# Base directory for training outputs.
root_dir: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/tcn_hpl/train-TCN-M2_bbn_hololens/training_root"

# Convenience variable to where your train/val/test split COCO file datasets
# are stored.
coco_file_root: ${paths.root_dir}

#logger:
# aim:
# experiment: ${task_name}
# capture_terminal_logs: true
123 changes: 123 additions & 0 deletions tcn_hpl/models/components/ms_tcs_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,103 @@ def forward(self, x, mask):
# Bring it back to input shape [batch_size, feat_dim, window_size]
x = einops.rearrange(re_x, "b w d -> b d w")

# input here is not being softmaxed because dim1 is feature inputs, not
# predictions.
out = self.stage1(x, mask)
# out shape: (batch_size, num_classes, window_size)
outputs = out.unsqueeze(0)
for s in self.stages:
out = s(F.softmax(out, dim=1) * mask[:, None, :], mask)
# out shape: (batch_size, num_classes, window_size)
outputs = torch.cat((outputs, out.unsqueeze(0)), dim=0)

return outputs


class MultiStageModelResidual(nn.Module):
"""
Similar to the MultiStageModel class, however stages after the first is
added to the output of the previous.
"""
def __init__(
self,
fc_sequence_dims: Sequence[int],
fc_sequence_dropout_p: float,
num_stages: int,
num_layers: int,
num_f_maps: int,
dim: int,
num_classes: int,
linear_single_stage: bool = True,
do_stage_residual: bool = True,
do_stage_softmax: bool = False,
):
"""Initialize a `MultiStageModel` module.
:param fc_sequence_dims: Create N*2 linear layers with u-net-like skip
connections connecting inputs and outputs of the same dimensions.
If an empty sequence is provided, then no FC layers are created
:param fc_sequence_dropout_p: P-value for drop-out layers utilized in
the FC u-net block.
:param num_stages: Number of State Model Layers.
:param num_layers: Number of Layers within each State Model.
:param num_f_maps: Feature size within the state model
:param dim: Feature size between state models.
:param num_classes: Number of output classes.
:param linear_single_stage: Use `SingleStageModelLinear` class for
single stage layers, otherwise use `SingleStageModel`.
:param do_stage_residual: Enable adding previous stage output to
successive stage outputs. Default True.
:param do_stage_softmax: Enable performing a softmax operation on
previous stage outputs before input to successive stages. This only
affects the inout to a stage, and does not output the optional
residual stage addition via `do_stage_residual`. Default False.
"""
super().__init__()

# One FC sequence that is applied to a single frame's feature vector,
self.frame_fc = LinearSkipBlock([dim] + list(fc_sequence_dims), fc_sequence_dropout_p)

stage_class = SingleStageModel
if linear_single_stage:
stage_class = SingleStageModelLinear

self.stage1 = stage_class(num_layers, num_f_maps, dim, num_classes)
self.stages = nn.ModuleList(
[
stage_class(num_layers, num_f_maps, num_classes, num_classes)
for _ in range(num_stages - 1)
]
)

self.do_stage_residual = do_stage_residual
self.do_stage_softmax = do_stage_softmax

def forward(self, x, mask):
# x shape: [batch_size, feat_dim, window_size]
# mask shape: [batch_size, window_size]

# Shape [batch_size, window_size, feat_dim]
re_x = einops.rearrange(x, "b d w -> b w d")
re_x = self.frame_fc(re_x)
# Bring it back to input shape [batch_size, feat_dim, window_size]
x = einops.rearrange(re_x, "b w d -> b d w")

# input here is not being softmaxed because dim1 is feature inputs, not
# predictions.
out = self.stage1(x, mask)
# out shape: (batch_size, num_classes, window_size)
outputs = out.unsqueeze(0)
for s in self.stages:
s_in = out
if self.do_stage_softmax:
s_in = F.softmax(s_in, dim=1)
s_out = s(s_in * mask[:, None, :], mask)
if self.do_stage_residual:
s_out = out + s_out
out = s_out # update the temp "out" var for cross-loop interaction
# out shape: (batch_size, num_classes, window_size)
outputs = torch.cat((outputs, out.unsqueeze(0)), dim=0)

return outputs
Expand Down Expand Up @@ -158,6 +251,36 @@ def forward(self, x, mask):
return out


class SingleStageModelLinear(nn.Module):
"""
Version of the SingleStageModel but where the increasing dilation of
successive layers linearly increases instead of exponentially.
Input to the forward method should be shape (batch, dim, window_size).
"""
def __init__(self, num_layers, num_f_maps, dim, num_classes):
super().__init__()
self.conv_1x1 = nn.Conv1d(dim, num_f_maps, 1)
self.layers = nn.ModuleList(
[
copy.deepcopy(DilatedResidualLayer(1 + i, num_f_maps, num_f_maps))
for i in range(num_layers)
]
)
self.conv_out = nn.Conv1d(num_f_maps, num_classes, 1)

def forward(self, x, mask):

out = self.conv_1x1(x)
for layer in self.layers:
# the DR layers already add their output to the input, so no need
# do that here again.
out = layer(out, mask)
out = self.conv_out(out) * mask[:, None, :]

return out


class DilatedResidualLayer(nn.Module):
def __init__(self, dilation, in_channels, out_channels):
super(DilatedResidualLayer, self).__init__()
Expand Down

0 comments on commit beaaa59

Please sign in to comment.