hbb1 · hugoycj · Jun 12, 2024 · Jul 23, 2024 · Jul 23, 2024 · Aug 30, 2024
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,5 @@ data
 *.out
 eval
 *.npz
-**/tmp
+**/tmp
+eval_dtu
diff --git a/.gitmodules b/.gitmodules
@@ -1,6 +1,9 @@
-[submodule "submodules/diff-surfel-rasterization"]
-	path = submodules/diff-surfel-rasterization
-	url = https://github.com/hbb1/diff-surfel-rasterization.git
 [submodule "submodules/simple-knn"]
 	path = submodules/simple-knn
 	url = https://gitlab.inria.fr/bkerbl/simple-knn.git
+[submodule "submodules/diff-surfel-rasterization"]
+	path = submodules/diff-surfel-rasterization
+	url = https://github.com/hugoycj/diff-surfel-rasterization-MCMC
+[submodule "submodules/fused-ssim"]
+	path = submodules/fused-ssim
+	url = https://github.com/rahul-goel/fused-ssim.git
diff --git a/arguments/__init__.py b/arguments/__init__.py
@@ -50,11 +50,14 @@ def __init__(self, parser, sentinel=False):
         self._source_path = ""
         self._model_path = ""
         self._images = "images"
-        self._resolution = -1
+        self._resolution = 1
         self._white_background = False
         self.data_device = "cuda"
         self.eval = False
         self.render_items = ['RGB', 'Alpha', 'Normal', 'Depth', 'Edge', 'Curvature']
+        self.w_normal_prior = ""
+        self.w_mask = ""
+        self.use_decoupled_appearance = False
         super().__init__(parser, "Loading Parameters", sentinel)
 
     def extract(self, args):
@@ -81,17 +84,39 @@ def __init__(self, parser):
         self.opacity_lr = 0.05
         self.scaling_lr = 0.005
         self.rotation_lr = 0.001
+        self.appearance_embeddings_lr = 0.001
+        self.appearance_network_lr = 0.001
         self.percent_dense = 0.01
         self.lambda_dssim = 0.2
-        self.lambda_dist = 0.0
+        self.lambda_dist = 0.
+        self.lambda_depth = 0.1
         self.lambda_normal = 0.05
+        self.lambda_mask = 0.
+        self.lambda_normal_prior = 0.05
+        self.lambda_normal_gradient = 0.01
         self.opacity_cull = 0.05
 
+        self.split_interval = 500
+        self.max_screen_size = 20
+
         self.densification_interval = 100
         self.opacity_reset_interval = 3000
         self.densify_from_iter = 500
         self.densify_until_iter = 15_000
         self.densify_grad_threshold = 0.0002
+
+        self.propagation_interval = 20
+        self.depth_error_min_threshold = 0.8
+        self.depth_error_max_threshold = 1.0
+        self.propagation_begin = 9000
+        self.propagation_after = 15000
+        self.patch_size = 11
+
+        self.pixel_dense_from_iter = 30000
+
+        self.contribution_prune_from_iter = 500
+        self.contribution_prune_interval = 300
+        self.contribution_prune_ratio = 0.1
         super().__init__(parser, "Optimization Parameters")
 
 def get_combined_args(parser : ArgumentParser):

diff --git a/gaussian_renderer/__init__.py b/gaussian_renderer/__init__.py
@@ -16,15 +16,29 @@
 from utils.sh_utils import eval_sh
 from utils.point_utils import depth_to_normal
 
-def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, override_color = None):
+def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, 
+           override_color = None, record_transmittance=False, bg_gaussians=None, skip_geometric=False):
     """
     Render the scene. 
 
     Background tensor (bg_color) must be on GPU!
     """
-
+    if bg_gaussians is None:
+        means3D = pc.get_xyz
+        opacity = pc.get_opacity
+        scales = pc.get_scaling
+        rotations = pc.get_rotation
+        shs = pc.get_features
+    else:
+        means3D = torch.cat([pc.get_xyz, bg_gaussians.get_xyz])
+        opacity = torch.cat([pc.get_opacity, bg_gaussians.get_opacity])
+        scales = torch.cat([pc.get_scaling, bg_gaussians.get_scaling])
+        rotations = torch.cat([pc.get_rotation, bg_gaussians.get_rotation])
+        shs = torch.cat([pc.get_features, bg_gaussians.get_features])
+    num_fg_points = pc.get_xyz.shape[0]
+
     # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
-    screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
+    screenspace_points = torch.zeros((means3D.shape[0], 4), dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
     try:
         screenspace_points.retain_grad()
     except:
@@ -46,74 +60,43 @@ def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor,
         sh_degree=pc.active_sh_degree,
         campos=viewpoint_camera.camera_center,
         prefiltered=False,
+        record_transmittance=record_transmittance,
         debug=False,
         # pipe.debug
     )
 
     rasterizer = GaussianRasterizer(raster_settings=raster_settings)
 
-    means3D = pc.get_xyz
     means2D = screenspace_points
-    opacity = pc.get_opacity
 
-    # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
-    # scaling / rotation by the rasterizer.
-    scales = None
-    rotations = None
-    cov3D_precomp = None
-    if pipe.compute_cov3D_python:
-        # currently don't support normal consistency loss if use precomputed covariance
-        splat2world = pc.get_covariance(scaling_modifier)
-        W, H = viewpoint_camera.image_width, viewpoint_camera.image_height
-        near, far = viewpoint_camera.znear, viewpoint_camera.zfar
-        ndc2pix = torch.tensor([
-            [W / 2, 0, 0, (W-1) / 2],
-            [0, H / 2, 0, (H-1) / 2],
-            [0, 0, far-near, near],
-            [0, 0, 0, 1]]).float().cuda().T
-        world2pix =  viewpoint_camera.full_proj_transform @ ndc2pix
-        cov3D_precomp = (splat2world[:, [0,1,3]] @ world2pix[:,[0,1,3]]).permute(0,2,1).reshape(-1, 9) # column major
-    else:
-        scales = pc.get_scaling
-        rotations = pc.get_rotation
-
-    # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
-    # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
-    pipe.convert_SHs_python = False
-    shs = None
-    colors_precomp = None
-    if override_color is None:
-        if pipe.convert_SHs_python:
-            shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2)
-            dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1))
-            dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True)
-            sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
-            colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
-        else:
-            shs = pc.get_features
-    else:
-        colors_precomp = override_color
-
-    rendered_image, radii, allmap = rasterizer(
+    output = rasterizer(
         means3D = means3D,
         means2D = means2D,
         shs = shs,
-        colors_precomp = colors_precomp,
+        colors_precomp = None,
         opacities = opacity,
         scales = scales,
         rotations = rotations,
-        cov3D_precomp = cov3D_precomp
-    )
-
+        cov3D_precomp = None)
+
+    if record_transmittance:
+        rendered_image, radii, allmap, transmittance_avg, num_covered_pixels = output
+        transmittance_avg = transmittance_avg[:num_fg_points]
+        num_covered_pixels = num_covered_pixels[:num_fg_points]
+    else:
+        rendered_image, radii, allmap = output
+        transmittance_avg = num_covered_pixels = None
+    radii = radii[:num_fg_points]
     # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
     # They will be excluded from value updates used in the splitting criteria.
     rets =  {"render": rendered_image,
             "viewspace_points": means2D,
             "visibility_filter" : radii > 0,
             "radii": radii,
+            "pixels_num":num_covered_pixels,
+            "transmittance_avg": transmittance_avg
     }
 
-
     # additional regularizations
     render_alpha = allmap[1:2]
 
@@ -140,18 +123,24 @@ def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor,
     # for unbounded scene, use expected depth, i.e., depth_ration = 0, to reduce disk anliasing.
     surf_depth = render_depth_expected * (1-pipe.depth_ratio) + (pipe.depth_ratio) * render_depth_median
 
-    # assume the depth points form the 'surface' and generate psudo surface normal for regularizations.
-    surf_normal = depth_to_normal(viewpoint_camera, surf_depth)
-    surf_normal = surf_normal.permute(2,0,1)
-    # remember to multiply with accum_alpha since render_normal is unnormalized.
-    surf_normal = surf_normal * (render_alpha).detach()
-
+    if skip_geometric:
+        # assume the depth points form the 'surface' and generate psudo surface normal for regularizations.
+        surf_normal_expected = depth_to_normal(viewpoint_camera, render_depth_expected).permute(2,0,1)
+        surf_normal = depth_to_normal(viewpoint_camera, render_depth_median).permute(2,0,1)
+        # remember to multiply with accum_alpha since render_normal is unnormalized.
+        surf_normal_expected = surf_normal_expected * (render_alpha).detach()
+        surf_normal = surf_normal * (render_alpha).detach()
+    else:
+        surf_normal_expected = render_normal
+        surf_normal = render_normal
 
     rets.update({
             'rend_alpha': render_alpha,
             'rend_normal': render_normal,
+            'rend_depth': render_depth_expected,
             'rend_dist': render_dist,
             'surf_depth': surf_depth,
+            'surf_normal_expected': surf_normal_expected,
             'surf_normal': surf_normal,
     })
 

diff --git a/scene/__init__.py b/scene/__init__.py
@@ -14,21 +14,23 @@
 import json
 from utils.system_utils import searchForMaxIteration
 from scene.dataset_readers import sceneLoadTypeCallbacks
-from scene.gaussian_model import GaussianModel
+from scene.gaussian_model import GaussianModel, BgGaussianModel
+from scene.appearance_model import AppearanceModel
 from arguments import ModelParams
 from utils.camera_utils import cameraList_from_camInfos, camera_to_JSON
 
 class Scene:
 
     gaussians : GaussianModel
 
-    def __init__(self, args : ModelParams, gaussians : GaussianModel, load_iteration=None, shuffle=True, resolution_scales=[1.0]):
+    def __init__(self, args : ModelParams, gaussians : GaussianModel, bg_gaussians: BgGaussianModel = None, load_iteration=None, shuffle=True, resolution_scales=[1.0]):
         """b
         :param path: Path to colmap scene main folder.
         """
         self.model_path = args.model_path
         self.loaded_iter = None
         self.gaussians = gaussians
+        self.bg_gaussians = bg_gaussians
 
         if load_iteration:
             if load_iteration == -1:
@@ -79,12 +81,21 @@ def __init__(self, args : ModelParams, gaussians : GaussianModel, load_iteration
                                                            "point_cloud",
                                                            "iteration_" + str(self.loaded_iter),
                                                            "point_cloud.ply"))
+            if self.bg_gaussians is not None:
+                self.bg_gaussians.load_ply(os.path.join(self.model_path,
+                                                           "point_cloud",
+                                                           "iteration_" + str(self.loaded_iter),
+                                                           "bg_point_cloud.ply"))
         else:
             self.gaussians.create_from_pcd(scene_info.point_cloud, self.cameras_extent)
+            if self.bg_gaussians is not None:
+                self.bg_gaussians.load_ply('assets/background_gs.ply')
 
     def save(self, iteration):
         point_cloud_path = os.path.join(self.model_path, "point_cloud/iteration_{}".format(iteration))
         self.gaussians.save_ply(os.path.join(point_cloud_path, "point_cloud.ply"))
+        if self.bg_gaussians is not None:
+            self.bg_gaussians.save_ply(os.path.join(point_cloud_path, "bg_point_cloud.ply"))
 
     def getTrainCameras(self, scale=1.0):
         return self.train_cameras[scale]

diff --git a/scene/appearance_model.py b/scene/appearance_model.py
@@ -0,0 +1,86 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class UpsampleBlock(nn.Module):
+    def __init__(self, num_input_channels, num_output_channels):
+        super(UpsampleBlock, self).__init__()
+        self.pixel_shuffle = nn.PixelShuffle(2)
+        self.conv = nn.Conv2d(num_input_channels // (2 * 2), num_output_channels, 3, stride=1, padding=1)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.pixel_shuffle(x)
+        x = self.conv(x)
+        x = self.relu(x)
+        return x
+
+class AppearanceNetwork(nn.Module):
+    def __init__(self, num_input_channels, num_output_channels):
+        super(AppearanceNetwork, self).__init__()
+
+        self.conv1 = nn.Conv2d(num_input_channels, 256, 3, stride=1, padding=1)
+        self.up1 = UpsampleBlock(256, 128)
+        self.up2 = UpsampleBlock(128, 64)
+        self.up3 = UpsampleBlock(64, 32)
+        self.up4 = UpsampleBlock(32, 16)
+
+        self.conv2 = nn.Conv2d(16, 16, 3, stride=1, padding=1)
+        self.conv3 = nn.Conv2d(16, num_output_channels, 3, stride=1, padding=1)
+        self.relu = nn.ReLU()
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.up1(x)
+        x = self.up2(x)
+        x = self.up3(x)
+        x = self.up4(x)
+        # bilinear interpolation
+        x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
+        x = self.conv2(x)
+        x = self.relu(x)
+        x = self.conv3(x)
+        x = self.sigmoid(x)
+        return x
+
+class AppearanceModel:
+    def __init__(self, num_embeddings, num_input_channels=67, num_output_channels=3):
+        self.appearance_network = AppearanceNetwork(num_input_channels, num_output_channels).cuda()
+
+        std = 1e-4
+        self._appearance_embeddings = nn.Parameter(torch.empty(num_embeddings, 64).cuda())
+        self._appearance_embeddings.data.normal_(0, std)
+
+    def get_embedding(self, idx):
+        return self._appearance_embeddings[idx]
+
+    def training_setup(self, training_args):
+        params = [
+            {'params': [self._appearance_embeddings], 'lr': training_args.appearance_embeddings_lr, "name": "appearance_embeddings"},
+            {'params': self.appearance_network.parameters(), 'lr': training_args.appearance_network_lr, "name": "appearance_network"}
+        ]
+        self.optimizer = torch.optim.Adam(params, lr=0.0, eps=1e-15)
+
+    def load_state_dict(self, state_dict):
+        self._appearance_embeddings = state_dict["_appearance_embeddings"]
+        self.appearance_network.load_state_dict(state_dict["appearance_network"])
+
+    def state_dict(self):
+        return {
+            "_appearance_embeddings": self._appearance_embeddings,
+            "appearance_network": self.appearance_network.state_dict()
+        }
+
+
+
+if __name__ == "__main__":
+    H, W = 1200//32, 1600//32
+    input_channels = 3 + 64
+    output_channels = 3
+    input = torch.randn(1, input_channels, H, W).cuda()
+    model = AppearanceNetwork(input_channels, output_channels).cuda()
+
+    output = model(input)
+    print(output.shape)
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,4 +11,5 @@ data @@
     *.out
     eval
     *.npz
-    **/tmp
+    **/tmp
+    eval_dtu