From c157e9e1ddadfbf8f980544c73d9a2dcd8c15d91 Mon Sep 17 00:00:00 2001
From: Simon Meister <simon.meister.93@gmail.com>
Date: Tue, 14 Nov 2017 19:57:58 +0100
Subject: [PATCH] WIP

---
 approach.tex    | 4 ++--
 background.tex  | 6 +++---
 experiments.tex | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/approach.tex b/approach.tex
index 8afdbb8..0fab62d 100644
--- a/approach.tex
+++ b/approach.tex
@@ -196,7 +196,7 @@ predict $\sin(\alpha)$, $\sin(\beta)$, $\sin(\gamma)$ and $t_t^{cam}$ in the sam
 Again, we predict a softmax score $o_t^{cam}$ for differentiating between
 a still and moving camera.
 
-\subsection{Motion R-CNN network design}
+\subsection{Network design}
 
 \label{ssec:design}
 \paragraph{Camera motion network}
@@ -327,7 +327,7 @@ loss could benefit motion regression by removing any loss balancing issues betwe
 rotation, translation and pivot terms \cite{PoseNet2},
 which can make it interesting even when 3D motion ground truth is available.
 
-\subsection{Training and Inference}
+\subsection{Training and inference}
 \label{ssec:training_inference}
 \paragraph{Training}
 We train the Motion R-CNN RPN and RoI heads in the exact same way as described for Mask R-CNN.
diff --git a/background.tex b/background.tex
index 1562e5d..44b0e7d 100644
--- a/background.tex
+++ b/background.tex
@@ -16,7 +16,7 @@ requires estimating depth for each pixel. Generally, stereo input is used for sc
 to estimate disparity-based depth, however monocular depth estimation with deep networks is becoming
 popular \cite{DeeperDepth}.
 
-\subsection{Convolutional neural networks for dense motion estimation}
+\subsection{CNNs for dense motion estimation}
 Deep convolutional neural network (CNN) architectures
 \cite{ImageNetCNN, VGGNet, ResNet}
 became widely popular through numerous successes in classification and recognition tasks.
@@ -210,7 +210,7 @@ Figure taken from \cite{ResNet}.
 \label{figure:bottleneck}
 \end{figure}
 
-\subsection{Region-based convolutional networks}
+\subsection{Region-based CNNs}
 \label{ssec:rcnn}
 We now give an overview of region-based convolutional networks, which are currently by far the
 most popular deep networks for object detection, and have recently also been applied to instance segmentation.
@@ -439,7 +439,7 @@ Figure taken from \cite{FPN}.
 \label{figure:fpn_block}
 \end{figure}
 
-\subsection{Mask R-CNN: Training and Inference}
+\subsection{Mask R-CNN: Training and inference}
 \paragraph{Loss definitions}
 For regression, we define the smooth $\ell_1$ regression loss as
 \begin{equation}
diff --git a/experiments.tex b/experiments.tex
index 9e82b24..6589aa9 100644
--- a/experiments.tex
+++ b/experiments.tex
@@ -147,7 +147,7 @@ fn = \sum_k [o^{k,c_k} = 0 \land o^{gt,i_k} = 1].
 Analogously, we define error metrics $E_{R}^{cam}$ and $E_{t}^{cam}$ for
 predicted camera motions.
 
-\subsection{Virtual KITTI training setup}
+\subsection{Virtual KITTI: Training setup}
 \label{ssec:setup}
 
 For our initial experiments, we concatenate both RGB frames as
@@ -178,7 +178,7 @@ Note that a larger weight prevented the
 angle sine estimates from properly converging to the very small values they
 are in general expected to output.
 
-\subsection{Virtual KITTI evaluation}
+\subsection{Virtual KITTI: Evaluation}
 \label{ssec:vkitti}
 
 \begin{figure}[t]