详解3D Gaussian Splatting CUDA Kernel:反向传播(二)BACKWARD::preprocess
先看上一节
# 对预处理过程的反向传播BACKWARD::preprocess
分两步:
- 计算2D协方差矩阵的梯度:调用
computeCov2DCUDA
核函数,计算3D高斯球投影在成像平面上的2D协方差矩阵对3D高斯球均值和3D协方差的梯度。 - 处理剩余的反向传播步骤:调用
preprocessCUDA
核函数,完成对3D均值梯度的更新,以及将高斯球颜色梯度传播到球谐系数(SH),并将3D协方差矩阵的梯度传播到缩放和旋转参数。
void BACKWARD::preprocess(
int P, int D, int M,
const float3* means3D,
const int* radii,
const float* shs,
const bool* clamped,
const glm::vec3* scales,
const glm::vec4* rotations,
const float scale_modifier,
const float* cov3Ds,
const float* viewmatrix,
const float* projmatrix,
const float focal_x, float focal_y,
const float tan_fovx, float tan_fovy,
const glm::vec3* campos,
const float3* dL_dmean2D, // 高斯球投影在像平面上的2D均值位置的梯度(BACKWARD::render的计算结果)
const float* dL_dconic, // 高斯球投影在像平面上的2D协方差矩阵(不叠加透明度)的梯度(BACKWARD::render的计算结果)
glm::vec3* dL_dmean3D, // 高斯球3D均值位置的梯度(待求解)
float* dL_dcolor, // 高斯球颜色的梯度(BACKWARD::render的计算结果)
float* dL_dcov3D, // 高斯球3D协方差矩阵的梯度(待求解)
float* dL_dsh, // 高斯球球谐系数的梯度(待求解)
glm::vec3* dL_dscale, // 高斯球scale参数的梯度(待求解)
glm::vec4* dL_drot) // 高斯球rotation参数的梯度(待求解)
{
// Propagate gradients for the path of 2D conic matrix computation.
// Somewhat long, thus it is its own kernel rather than being part of
// "preprocess". When done, loss gradient w.r.t. 3D means has been
// modified and gradient w.r.t. 3D covariance matrix has been computed.
computeCov2DCUDA << <(P + 255) / 256, 256 >> > (
P,
means3D,
radii,
cov3Ds,
focal_x,
focal_y,
tan_fovx,
tan_fovy,
viewmatrix,
dL_dconic,
(float3*)dL_dmean3D,
dL_dcov3D);
// Propagate gradients for remaining steps: finish 3D mean gradients,
// propagate color gradients to SH (if desireD), propagate 3D covariance
// matrix gradients to scale and rotation.
preprocessCUDA<NUM_CHANNELS> << < (P + 255) / 256, 256 >> > (
P, D, M,
(float3*)means3D,
radii,
shs,
clamped,
(glm::vec3*)scales,
(glm::vec4*)rotations,
scale_modifier,
projmatrix,
campos,
(float3*)dL_dmean2D,
(glm::vec3*)dL_dmean3D,
dL_dcolor,
dL_dcov3D,
dL_dsh,
dL_dscale,
dL_drot);
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# 进一步深入computeCov2DCUDA
// Backward version of INVERSE 2D covariance matrix computation
// (due to length launched as separate kernel before other
// backward steps contained in preprocess)
__global__ void computeCov2DCUDA(int P,
const float3* means,
const int* radii,
const float* cov3Ds,
const float h_x, float h_y,
const float tan_fovx, float tan_fovy,
const float* view_matrix,
const float* dL_dconics, // 高斯球投影在像平面上的2D协方差矩阵的梯度
float3* dL_dmeans, // 高斯球投影在像平面上的2D均值位置的梯度
float* dL_dcov) // 高斯球的3D协方差矩阵的梯度(待求解)
{
auto idx = cg::this_grid().thread_rank(); // 当前thread要处理的高斯球编号
if (idx >= P || !(radii[idx] > 0))
return;
// Reading location of 3D covariance for this Gaussian
const float* cov3D = cov3Ds + 6 * idx; // 根据高斯球编号找到其3D协方差矩阵,因为是对称矩阵所以只有6个值
// Fetch gradients, recompute 2D covariance and relevant
// intermediate forward results needed in the backward.
float3 mean = means[idx]; // 根据高斯球编号找到其3D均值(高斯球中心点)
float3 dL_dconic = { dL_dconics[4 * idx], dL_dconics[4 * idx + 1], dL_dconics[4 * idx + 3] }; // renderCUDA中计算好的锥体参数梯度
float3 t = transformPoint4x3(mean, view_matrix); // 高斯球中心点坐标变换到相机坐标系
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 限制高斯球中心点坐标范围
和前向传播中一样保持数值稳定性避免精度溢出:
const float limx = 1.3f * tan_fovx;
const float limy = 1.3f * tan_fovy;
const float txtz = t.x / t.z; // xy除了个z
const float tytz = t.y / t.z;
t.x = min(limx, max(-limx, txtz)) * t.z; // 限制大小后又把z给乘回去了
t.y = min(limy, max(-limy, tytz)) * t.z;
2
3
4
5
6
和前向传播中不一样的是这里多了一步01标记了那些中心点超出范围的高斯球:
const float x_grad_mul = txtz < -limx || txtz > limx ? 0 : 1; // 01标记了那些中心点超出范围的高斯球
const float y_grad_mul = tytz < -limy || tytz > limy ? 0 : 1;
2
# 雅可比矩阵J
复习前向传播中的雅可比矩阵J计算公式,其表示2维空间坐标(u,v)对3维空间中的坐标(x,y,z)的导数:
J=∂(x,y,z)∂(u,v)=[∂x∂u∂x∂v∂y∂u∂y∂v∂z∂u∂z∂v]=[fxz100fyz1−fxz2x−fyz2y]
glm::mat3 J = glm::mat3(h_x / t.z, 0.0f, -(h_x * t.x) / (t.z * t.z),
0.0f, h_y / t.z, -(h_y * t.y) / (t.z * t.z),
0, 0, 0);
2
3
代码里的J
为了方便glm
库的加速所以加上了一行0。
和前向传播代码不一样的就是h_x
和h_y
表示fx和fy,其他均相同
# 2D协方差矩阵Σ′
复习前向传播中的计算2D协方差矩阵Σ′计算公式:
Σ′=JWΣW⊤J⊤
其中W为视角变换矩阵的前3x3部分,代码中的W
对应W⊤:
glm::mat3 W = glm::mat3(
view_matrix[0], view_matrix[4], view_matrix[8],
view_matrix[1], view_matrix[5], view_matrix[9],
view_matrix[2], view_matrix[6], view_matrix[10]);
2
3
4
Vrk
就是Σ:
glm::mat3 Vrk = glm::mat3(
cov3D[0], cov3D[1], cov3D[2],
cov3D[1], cov3D[3], cov3D[4],
cov3D[2], cov3D[4], cov3D[5]);
2
3
4
令T=JW,则Σ′=TΣT⊤:
glm::mat3 T = W * J;
glm::mat3 cov2D = glm::transpose(T) * glm::transpose(Vrk) * T;
2
3
# 对3D协方差矩阵Σ的梯度
在BACKWARD::render
中已经计算了Loss值对高斯球i投影在成像平面上的锥体参数(2D协方差逆矩阵)的梯度∂Σ′−1∂L(文中公式记为∂(A,B,C)∂L,此处对应dL_dconic2D
)和中心位置梯度∂(u,v)∂L(文中公式记为∂(xi,yi)∂L,此处对应dL_dmeans
)。
接下来需要计算对高斯球3D中心位置(x,y,z)和3D协方差矩阵Σ的梯度。
已知∂Σ′−1∂L, 将2D协方差矩阵Σ′和逆Σ′−1展开来进行计算:
Σ′Σ′−1=[abbc]=[ABBC]
按照链式法则展开∂Σ∂L的计算过程,令Σij为矩阵Σ中第i行j列的元素值,可得L对Σ中每个元素的导数的计算公式:
∂Σij∂L=∂A∂L∂Σij∂A+2∂B∂L∂Σij∂B+∂C∂L∂Σij∂C=∂A∂L(∂a∂A∂Σij∂a+∂b∂A∂Σij∂b+∂c∂A∂Σij∂c)+2∂B∂L(∂a∂B∂Σij∂a+∂b∂B∂Σij∂b+∂c∂B∂Σij∂c)+∂C∂L(∂a∂C∂Σij∂a+∂b∂C∂Σij∂b+∂c∂C∂Σij∂c)=(∂A∂L∂a∂A+2∂B∂L∂a∂B+∂C∂L∂a∂C)∂Σij∂a+(∂A∂L∂b∂A+2∂B∂L∂b∂B+∂C∂L∂b∂C)∂Σij∂b+(∂A∂L∂c∂A+2∂B∂L∂c∂B+∂C∂L∂c∂C)∂Σij∂c=∂a∂L∂Σij∂a+∂b∂L∂Σij∂b+∂c∂L∂Σij∂c
所以先求2D协方差矩阵Σ′中的几个标量的梯度∂(a,b,c)∂L再求∂Σij∂L
# 先求∂(a,b,c)∂L
根据求逆公式写出逆矩阵Σ′−1的表达式,即A,B,C和a,b,c之间的关系:
Σ′−1=[ABBC]=ac−b2[c−b−ba]
于是可以求得∂a∂L、∂b∂L和∂c∂L:
∂a∂L∂b∂L∂c∂L=∂A∂L∂a∂A+2∂B∂L∂a∂B+∂C∂L∂a∂C=∂a∂(ac−b2c∂A∂L−2b∂B∂L+a∂C∂L)=(ac−b2)2−c2∂A∂L+2bc∂B∂L−b2∂C∂L=∂A∂L∂b∂A+2∂B∂L∂b∂B+∂C∂L∂b∂C=∂b∂(ac−b2c∂A∂L−2b∂B∂L+a∂C∂L)=(ac−b2)22bc∂A∂L−2(ac+b2)∂B∂L+2ab∂C∂L=∂A∂L∂c∂A+2∂B∂L∂c∂B+∂C∂L∂c∂C=∂c∂(ac−b2c∂A∂L−2b∂B∂L+a∂C∂L)=(ac−b2)2−b2∂A∂L+2ab∂B∂L−a2∂C∂L
// Use helper variables for 2D covariance entries. More compact.
float a = cov2D[0][0] += 0.3f;
float b = cov2D[0][1];
float c = cov2D[1][1] += 0.3f;
float denom = a * c - b * b;
float dL_da = 0, dL_db = 0, dL_dc = 0;
float denom2inv = 1.0f / ((denom * denom) + 0.0000001f);
if (denom2inv != 0)
{
// Gradients of loss w.r.t. entries of 2D covariance matrix,
// given gradients of loss w.r.t. conic matrix (inverse covariance matrix).
// e.g., dL / da = dL / d_conic_a * d_conic_a / d_a
dL_da = denom2inv * (-c * c * dL_dconic.x + 2 * b * c * dL_dconic.y + (denom - a * c) * dL_dconic.z);
dL_dc = denom2inv * (-a * a * dL_dconic.z + 2 * a * b * dL_dconic.y + (denom - a * c) * dL_dconic.x);
dL_db = denom2inv * 2 * (b * c * dL_dconic.x - (denom + 2 * b * b) * dL_dconic.y + a * b * dL_dconic.z);
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 再求∂Σij∂L
将2D协方差矩阵Σ′和T展开来进行计算:
Σ′=[abbc]=TΣT⊤=[T1T2]Σ[T1⊤T2⊤]=[T1ΣT1⊤T2ΣT1⊤T1ΣT2⊤T2ΣT2⊤]
其中T1和T2表示T矩阵的第1和第2行向量。 令Tij为矩阵T中第i行j列的元素值,于是可进一步展开,单独求得(a,b,c)对Σ中每个元素的导数:
ac=T1ΣT1⊤=T2ΣT2⊤=i,j∈{1,2,3}∑T1iΣijT1j=i,j∈{1,2,3}∑T2iΣijT2j⇒⇒∂Σij∂a∂Σij∂c=T1iT1j=T2iT2j
对左下角和右上角的b的求导是两条路:
bb=T1ΣT2⊤=T2ΣT1⊤=i,j∈{1,2,3}∑T1iΣijT2j=i,j∈{1,2,3}∑T2iΣijT1j⇒⇒∂Σij∂b∂Σij∂b=T1iT2j=T2iT1j
于是b的导数是两条路的均值:
∂Σij∂b=2T1iT2j+T2iT1j
于是L对Σ中每个元素的导数为:
∂Σij∂L=∂a∂L∂Σij∂a+∂b∂L∂Σij∂b+∂c∂L∂Σij∂c=∂a∂LT1iT1j+∂b∂L2T1iT2j+T2iT1j+∂c∂LT2iT2j
// Gradients of loss L w.r.t. each 3D covariance matrix (Vrk) entry,
// given gradients w.r.t. 2D covariance matrix (diagonal).
// cov2D = transpose(T) * transpose(Vrk) * T;
dL_dcov[6 * idx + 0] = (T[0][0] * T[0][0] * dL_da + T[0][0] * T[1][0] * dL_db + T[1][0] * T[1][0] * dL_dc);
dL_dcov[6 * idx + 3] = (T[0][1] * T[0][1] * dL_da + T[0][1] * T[1][1] * dL_db + T[1][1] * T[1][1] * dL_dc);
dL_dcov[6 * idx + 5] = (T[0][2] * T[0][2] * dL_da + T[0][2] * T[1][2] * dL_db + T[1][2] * T[1][2] * dL_dc);
// Gradients of loss L w.r.t. each 3D covariance matrix (Vrk) entry,
// given gradients w.r.t. 2D covariance matrix (off-diagonal).
// Off-diagonal elements appear twice --> double the gradient.
// cov2D = transpose(T) * transpose(Vrk) * T;
dL_dcov[6 * idx + 1] = 2 * T[0][0] * T[0][1] * dL_da + (T[0][0] * T[1][1] + T[0][1] * T[1][0]) * dL_db + 2 * T[1][0] * T[1][1] * dL_dc;
dL_dcov[6 * idx + 2] = 2 * T[0][0] * T[0][2] * dL_da + (T[0][0] * T[1][2] + T[0][2] * T[1][0]) * dL_db + 2 * T[1][0] * T[1][2] * dL_dc;
dL_dcov[6 * idx + 4] = 2 * T[0][2] * T[0][1] * dL_da + (T[0][1] * T[1][2] + T[0][2] * T[1][1]) * dL_db + 2 * T[1][1] * T[1][2] * dL_dc;
}
else
{
for (int i = 0; i < 6; i++)
dL_dcov[6 * idx + i] = 0;
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
由 ∂Σij∂L 所构成的矩阵为对称矩阵,作者在代码中dL_dcov
只存储矩阵的上半边还把非对角线元素求和了,在后面computeCov3D
里用到dL_dcov
的地方(computeCov3D
里变量名为dL_dcov3D
)可以看到dL_dcov
又被展开为对称矩阵并且把dL_dcov
非对角线元素乘了0.5。
# 对3D高斯球均值(x,y,z)的梯度
对T的梯度:
∂T∂L
// Gradients of loss w.r.t. upper 2x3 portion of intermediate matrix T
// cov2D = transpose(T) * transpose(Vrk) * T;
float dL_dT00 = 2 * (T[0][0] * Vrk[0][0] + T[0][1] * Vrk[0][1] + T[0][2] * Vrk[0][2]) * dL_da +
(T[1][0] * Vrk[0][0] + T[1][1] * Vrk[0][1] + T[1][2] * Vrk[0][2]) * dL_db;
float dL_dT01 = 2 * (T[0][0] * Vrk[1][0] + T[0][1] * Vrk[1][1] + T[0][2] * Vrk[1][2]) * dL_da +
(T[1][0] * Vrk[1][0] + T[1][1] * Vrk[1][1] + T[1][2] * Vrk[1][2]) * dL_db;
float dL_dT02 = 2 * (T[0][0] * Vrk[2][0] + T[0][1] * Vrk[2][1] + T[0][2] * Vrk[2][2]) * dL_da +
(T[1][0] * Vrk[2][0] + T[1][1] * Vrk[2][1] + T[1][2] * Vrk[2][2]) * dL_db;
float dL_dT10 = 2 * (T[1][0] * Vrk[0][0] + T[1][1] * Vrk[0][1] + T[1][2] * Vrk[0][2]) * dL_dc +
(T[0][0] * Vrk[0][0] + T[0][1] * Vrk[0][1] + T[0][2] * Vrk[0][2]) * dL_db;
float dL_dT11 = 2 * (T[1][0] * Vrk[1][0] + T[1][1] * Vrk[1][1] + T[1][2] * Vrk[1][2]) * dL_dc +
(T[0][0] * Vrk[1][0] + T[0][1] * Vrk[1][1] + T[0][2] * Vrk[1][2]) * dL_db;
float dL_dT12 = 2 * (T[1][0] * Vrk[2][0] + T[1][1] * Vrk[2][1] + T[1][2] * Vrk[2][2]) * dL_dc +
(T[0][0] * Vrk[2][0] + T[0][1] * Vrk[2][1] + T[0][2] * Vrk[2][2]) * dL_db;
2
3
4
5
6
7
8
9
10
11
12
13
14
对J的梯度:
∂J∂L
// Gradients of loss w.r.t. upper 3x2 non-zero entries of Jacobian matrix
// T = W * J
float dL_dJ00 = W[0][0] * dL_dT00 + W[0][1] * dL_dT01 + W[0][2] * dL_dT02;
float dL_dJ02 = W[2][0] * dL_dT00 + W[2][1] * dL_dT01 + W[2][2] * dL_dT02;
float dL_dJ11 = W[1][0] * dL_dT10 + W[1][1] * dL_dT11 + W[1][2] * dL_dT12;
float dL_dJ12 = W[2][0] * dL_dT10 + W[2][1] * dL_dT11 + W[2][2] * dL_dT12;
2
3
4
5
6
对(x,y,z)的梯度:
∂(x,y,z)∂L
float tz = 1.f / t.z;
float tz2 = tz * tz;
float tz3 = tz2 * tz;
// Gradients of loss w.r.t. transformed Gaussian mean t
float dL_dtx = x_grad_mul * -h_x * tz2 * dL_dJ02;
float dL_dty = y_grad_mul * -h_y * tz2 * dL_dJ12;
float dL_dtz = -h_x * tz2 * dL_dJ00 - h_y * tz2 * dL_dJ11 + (2 * h_x * t.x) * tz3 * dL_dJ02 + (2 * h_y * t.y) * tz3 * dL_dJ12;
// Account for transformation of mean to t
// t = transformPoint4x3(mean, view_matrix);
float3 dL_dmean = transformVec4x3Transpose({ dL_dtx, dL_dty, dL_dtz }, view_matrix);
// Gradients of loss w.r.t. Gaussian means, but only the portion
// that is caused because the mean affects the covariance matrix.
// Additional mean gradient is accumulated in BACKWARD::preprocess.
dL_dmeans[idx] = dL_dmean;
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 进一步深入preprocessCUDA
// Backward pass of the preprocessing steps, except
// for the covariance computation and inversion
// (those are handled by a previous kernel call)
template<int C>
__global__ void preprocessCUDA(
int P, int D, int M,
const float3* means,
const int* radii,
const float* shs,
const bool* clamped,
const glm::vec3* scales,
const glm::vec4* rotations,
const float scale_modifier,
const float* proj,
const glm::vec3* campos,
const float3* dL_dmean2D,
glm::vec3* dL_dmeans,
float* dL_dcolor,
float* dL_dcov3D,
float* dL_dsh,
glm::vec3* dL_dscale,
glm::vec4* dL_drot)
{
auto idx = cg::this_grid().thread_rank();
if (idx >= P || !(radii[idx] > 0))
return;
float3 m = means[idx];
// Taking care of gradients from the screenspace points
float4 m_hom = transformPoint4x4(m, proj);
float m_w = 1.0f / (m_hom.w + 0.0000001f);
// Compute loss gradient w.r.t. 3D means due to gradients of 2D means
// from rendering procedure
glm::vec3 dL_dmean;
float mul1 = (proj[0] * m.x + proj[4] * m.y + proj[8] * m.z + proj[12]) * m_w * m_w;
float mul2 = (proj[1] * m.x + proj[5] * m.y + proj[9] * m.z + proj[13]) * m_w * m_w;
dL_dmean.x = (proj[0] * m_w - proj[3] * mul1) * dL_dmean2D[idx].x + (proj[1] * m_w - proj[3] * mul2) * dL_dmean2D[idx].y;
dL_dmean.y = (proj[4] * m_w - proj[7] * mul1) * dL_dmean2D[idx].x + (proj[5] * m_w - proj[7] * mul2) * dL_dmean2D[idx].y;
dL_dmean.z = (proj[8] * m_w - proj[11] * mul1) * dL_dmean2D[idx].x + (proj[9] * m_w - proj[11] * mul2) * dL_dmean2D[idx].y;
// That's the second part of the mean gradient. Previous computation
// of cov2D and following SH conversion also affects it.
dL_dmeans[idx] += dL_dmean;
// Compute gradient updates due to computing colors from SHs
if (shs)
computeColorFromSH(idx, D, M, (glm::vec3*)means, *campos, shs, clamped, (glm::vec3*)dL_dcolor, (glm::vec3*)dL_dmeans, (glm::vec3*)dL_dsh);
// Compute gradient updates due to computing covariance from scale/rotation
if (scales)
computeCov3D(idx, scales[idx], scale_modifier, rotations[idx], dL_dcov3D, dL_dscale, dL_drot);
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# 进一步深入computeCov3D
// Backward pass for the conversion of scale and rotation to a
// 3D covariance matrix for each Gaussian.
__device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const glm::vec4 rot, const float* dL_dcov3Ds, glm::vec3* dL_dscales, glm::vec4* dL_drots)
{
// Recompute (intermediate) results for the 3D covariance computation.
glm::vec4 q = rot;// / glm::length(rot);
float r = q.x;
float x = q.y;
float y = q.z;
float z = q.w;
glm::mat3 R = glm::mat3(
1.f - 2.f * (y * y + z * z), 2.f * (x * y - r * z), 2.f * (x * z + r * y),
2.f * (x * y + r * z), 1.f - 2.f * (x * x + z * z), 2.f * (y * z - r * x),
2.f * (x * z - r * y), 2.f * (y * z + r * x), 1.f - 2.f * (x * x + y * y)
);
glm::mat3 S = glm::mat3(1.0f);
glm::vec3 s = mod * scale;
S[0][0] = s.x;
S[1][1] = s.y;
S[2][2] = s.z;
glm::mat3 M = S * R;
const float* dL_dcov3D = dL_dcov3Ds + 6 * idx;
glm::vec3 dunc(dL_dcov3D[0], dL_dcov3D[3], dL_dcov3D[5]);
glm::vec3 ounc = 0.5f * glm::vec3(dL_dcov3D[1], dL_dcov3D[2], dL_dcov3D[4]);
// Convert per-element covariance loss gradients to matrix form
glm::mat3 dL_dSigma = glm::mat3(
dL_dcov3D[0], 0.5f * dL_dcov3D[1], 0.5f * dL_dcov3D[2],
0.5f * dL_dcov3D[1], dL_dcov3D[3], 0.5f * dL_dcov3D[4],
0.5f * dL_dcov3D[2], 0.5f * dL_dcov3D[4], dL_dcov3D[5]
);
// Compute loss gradient w.r.t. matrix M
// dSigma_dM = 2 * M
glm::mat3 dL_dM = 2.0f * M * dL_dSigma;
glm::mat3 Rt = glm::transpose(R);
glm::mat3 dL_dMt = glm::transpose(dL_dM);
// Gradients of loss w.r.t. scale
glm::vec3* dL_dscale = dL_dscales + idx;
dL_dscale->x = glm::dot(Rt[0], dL_dMt[0]);
dL_dscale->y = glm::dot(Rt[1], dL_dMt[1]);
dL_dscale->z = glm::dot(Rt[2], dL_dMt[2]);
dL_dMt[0] *= s.x;
dL_dMt[1] *= s.y;
dL_dMt[2] *= s.z;
// Gradients of loss w.r.t. normalized quaternion
glm::vec4 dL_dq;
dL_dq.x = 2 * z * (dL_dMt[0][1] - dL_dMt[1][0]) + 2 * y * (dL_dMt[2][0] - dL_dMt[0][2]) + 2 * x * (dL_dMt[1][2] - dL_dMt[2][1]);
dL_dq.y = 2 * y * (dL_dMt[1][0] + dL_dMt[0][1]) + 2 * z * (dL_dMt[2][0] + dL_dMt[0][2]) + 2 * r * (dL_dMt[1][2] - dL_dMt[2][1]) - 4 * x * (dL_dMt[2][2] + dL_dMt[1][1]);
dL_dq.z = 2 * x * (dL_dMt[1][0] + dL_dMt[0][1]) + 2 * r * (dL_dMt[2][0] - dL_dMt[0][2]) + 2 * z * (dL_dMt[1][2] + dL_dMt[2][1]) - 4 * y * (dL_dMt[2][2] + dL_dMt[0][0]);
dL_dq.w = 2 * r * (dL_dMt[0][1] - dL_dMt[1][0]) + 2 * x * (dL_dMt[2][0] + dL_dMt[0][2]) + 2 * y * (dL_dMt[1][2] + dL_dMt[2][1]) - 4 * z * (dL_dMt[1][1] + dL_dMt[0][0]);
// Gradients of loss w.r.t. unnormalized quaternion
float4* dL_drot = (float4*)(dL_drots + idx);
*dL_drot = float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w };//dnormvdv(float4{ rot.x, rot.y, rot.z, rot.w }, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w });
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66