From 183dc7266911eb439ad2927c1825eee95230cb58 Mon Sep 17 00:00:00 2001 From: Simon Meister Date: Tue, 14 Nov 2017 19:51:05 +0100 Subject: [PATCH] WIP --- abstract.tex | 6 +++--- background.tex | 10 +++++----- introduction.tex | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/abstract.tex b/abstract.tex index fb37a8d..49a0bf0 100644 --- a/abstract.tex +++ b/abstract.tex @@ -63,10 +63,10 @@ Bei Eingabe von zwei aufeinanderfolgenden frames aus einer monokularen RGB-D Kamera erkennt unser end-to-end Deep Network Objekte mit pixelgenauen Objektmasken und schätzt die 3D-Bewegung jedes erkannten Objekts zwischen den frames ab. Indem wir zusätzlich im selben Netzwerk die globale Kamerabewegung schätzen, -setzen wir aus den instanzbasierten und globalen Bewegungsschätzungen dichten -optischen Fluss zusammen. +setzen wir aus den instanzbasierten und globalen Bewegungsschätzungen ein dichtes +optisches Flussfeld zusammen. Wir trainieren unser Netzwerk auf dem synthetischen Virtual KITTI Datensatz, -der ground truth für alle Komponenten unseres Systems bereitstellt. +der Ground Truth für alle Komponenten unseres Systems bereitstellt. \end{abstract} diff --git a/background.tex b/background.tex index 6787544..1562e5d 100644 --- a/background.tex +++ b/background.tex @@ -205,7 +205,7 @@ Batch normalization \cite{BN} is used after every convolution. \caption{ ResNet \cite{ResNet} \enquote{bottleneck} convolutional block introduced to reduce computational complexity in deeper network variants, shown here with 256 input and output channels. -Figure from \cite{ResNet}. +Figure taken from \cite{ResNet}. } \label{figure:bottleneck} \end{figure} @@ -278,10 +278,10 @@ The basic Mask R-CNN ResNet-50 architecture is shown in Table \ref{table:maskrcn Note that the per-class masks logits are put through a sigmoid layer, and thus there is no comptetition between classes for the mask prediction branch. -One important technical aspect of Mask R-CNN is the replacement of RoI pooling with +One important additional technical aspect of Mask R-CNN is the replacement of RoI pooling with bilinear sampling for extracting the RoI features, which is much more precise. -%In RoI pooling, at the borders, the bins for max-pooling are not aligned with the actual pixel -%boundary of the bounding box. +In the original RoI pooling from Fast R-CNN, the bins for max-pooling are not aligned with the actual pixel +boundary of the bounding box, and thus some detail is lost. { @@ -434,7 +434,7 @@ block (see Figure \ref{figure:fpn_block}). FPN block from \cite{FPN}. Lower resolution features coming from the bottleneck are bilinearly upsampled and added with higher resolution skip connections from the encoder. -Figure from \cite{FPN}. +Figure taken from \cite{FPN}. } \label{figure:fpn_block} \end{figure} diff --git a/introduction.tex b/introduction.tex index 1af994d..3b93756 100644 --- a/introduction.tex +++ b/introduction.tex @@ -48,9 +48,9 @@ often fails to properly segment the pixels into the correct masks or assigns bac \includegraphics[width=\textwidth]{figures/sfmnet_kitti} \caption{ Results of SfM-Net \cite{SfmNet} on KITTI \cite{KITTI2015}. -From left to right we show, instance segmentation into up to 3 independent objects, +From left to right, we show their instance segmentation into up to 3 independent objects, ground truth instance masks for the segmented objects, composed optical flow and ground truth optical flow. -Figure from \cite{SfmNet}. +Figure taken from \cite{SfmNet}. } \label{figure:sfmnet_kitti} \end{figure} @@ -67,7 +67,7 @@ and predicts pixel-precise segmentation masks for each detected object (Figure \ \includegraphics[width=\textwidth]{figures/maskrcnn_cs} \caption{ Instance segmentation results of Mask R-CNN ResNet-50-FPN \cite{MaskRCNN} -on Cityscapes \cite{Cityscapes}. Figure from \cite{MaskRCNN} +on Cityscapes \cite{Cityscapes}. Figure taken from \cite{MaskRCNN}. } \label{figure:maskrcnn_cs} \end{figure}