mirror of
https://github.com/tu-darmstadt-informatik/bsc-thesis.git
synced 2025-12-12 17:35:51 +00:00
pre-final
This commit is contained in:
parent
b895fa9f18
commit
d0d7e6b176
@ -33,13 +33,10 @@ and compose a dense optical flow field based on instance-level and global motion
|
||||
predictions. We train our network on the synthetic Virtual KITTI dataset,
|
||||
which provides ground truth for all components of our system.
|
||||
|
||||
\end{abstract}
|
||||
|
||||
\renewcommand{\abstractname}{Zusammenfassung}
|
||||
\begin{abstract}
|
||||
\subsection*{\textbf{Zusammenfassung}}
|
||||
|
||||
Mit dem Aufkommen von Deep Learning
|
||||
ist die Umfunktionierung generischer Deep Networks ein
|
||||
ist das Umfunktionieren generischer Deep Networks ein
|
||||
beliebter Ansatz für klassische Probleme der Computer Vision geworden,
|
||||
die pixelweise Schätzung erfordern.
|
||||
|
||||
|
||||
@ -255,7 +255,7 @@ performs better in our case than the standard $\ell_1$-loss.
|
||||
We thus compute the RoI motion loss as
|
||||
|
||||
\begin{equation}
|
||||
\text{L}_{motion} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_k^{\text{N}_{RoI}} l_{p}^k + (l_{R}^k + l_{t}^k) \cdot o_k^* + l_o^k,
|
||||
L_{motion} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_k^{\text{N}_{RoI}} l_{p}^k + (l_{R}^k + l_{t}^k) \cdot o_k^* + l_o^k,
|
||||
\end{equation}
|
||||
where
|
||||
\begin{equation}
|
||||
@ -284,7 +284,7 @@ other than $c_k^*$ are not penalized.
|
||||
|
||||
Now, our modified RoI loss is
|
||||
\begin{equation}
|
||||
\text{L}_{RoI} = \text{L}_{cls} + \text{L}_{box} + \text{L}_{mask} + \text{L}_{motion}.
|
||||
L_{RoI} = L_{cls} + L_{box} + L_{mask} + L_{motion}.
|
||||
\end{equation}
|
||||
|
||||
\paragraph{Camera motion supervision}
|
||||
@ -339,7 +339,7 @@ full image resolution, as
|
||||
the depth crops and 2D point grid are at the same resolution as the predicted
|
||||
$m \times m$ mask.
|
||||
|
||||
For each RoI, we can now compute $\text{L}_{RoI}$ and thus supervise the object motion
|
||||
For each RoI, we can now compute $L_{RoI}$ and thus supervise the object motion
|
||||
by penalizing the $m \times m$ optical flow grid.
|
||||
If there is optical flow ground truth available, we can use the RoI bounding box to
|
||||
crop and resize a region from the ground truth optical flow to match the RoI's
|
||||
@ -377,7 +377,7 @@ Again, as for masks and bounding boxes in Mask R-CNN,
|
||||
the predicted output object motions are the predicted object motions for the
|
||||
highest scoring class.
|
||||
|
||||
\subsection{Dense flow from motion}
|
||||
\subsection{Dense flow from 3D motion}
|
||||
\label{ssec:postprocessing}
|
||||
As a postprocessing, we compose the dense optical flow between $I_t$ and $I_{t+1}$ from the outputs of our Motion R-CNN network.
|
||||
Given the depth map $d_t$ for frame $I_t$, we first create a 3D point cloud in camera space at time $t$,
|
||||
|
||||
@ -594,14 +594,14 @@ it is negative, let $s_i$ be the predicted objectness score and $b_i$, $b_i^*$ t
|
||||
predicted and ground truth bounding box encodings.
|
||||
Then, the RPN loss is computed as
|
||||
\begin{equation}
|
||||
\text{L}_{RPN} = \text{L}_{obj} + \text{L}_{box}^{RPN},
|
||||
L_{RPN} = L_{obj} + L_{box}^{RPN},
|
||||
\end{equation}
|
||||
where
|
||||
\begin{equation}
|
||||
\text{L}_{obj} = \frac{1}{\text{N}_{RPN}} \sum_{i=1}^{\text{N}_{RPN}} \ell_{cls}(s_i, s_i^*),
|
||||
L_{obj} = \frac{1}{\text{N}_{RPN}} \sum_{i=1}^{\text{N}_{RPN}} \ell_{cls}(s_i, s_i^*),
|
||||
\end{equation}
|
||||
\begin{equation}
|
||||
\text{L}_{box}^{RPN} = \frac{1}{\text{N}_{RPN}^{pos}} \sum_{i=1}^{\text{N}_{RPN}} s_i^* \cdot \ell_{reg}(b_i^* - b_i),
|
||||
L_{box}^{RPN} = \frac{1}{\text{N}_{RPN}^{pos}} \sum_{i=1}^{\text{N}_{RPN}} s_i^* \cdot \ell_{reg}(b_i^* - b_i),
|
||||
\end{equation}
|
||||
and
|
||||
\begin{equation}
|
||||
@ -630,19 +630,19 @@ In our implementation, we use nearest neighbour resizing for resizing the mask
|
||||
targets.
|
||||
Then, the ROI loss is computed as
|
||||
\begin{equation}
|
||||
\text{L}_{RoI} = \text{L}_{cls} + \text{L}_{box} + \text{L}_{mask}
|
||||
L_{RoI} = L_{cls} + L_{box} + L_{mask}
|
||||
\end{equation}
|
||||
where
|
||||
\begin{equation}
|
||||
\text{L}_{cls} = \frac{1}{\text{N}_{RoI}} \sum_{i=1}^{\text{N}_{RoI}} \ell_{cls}(c_i, c_i^*),
|
||||
L_{cls} = \frac{1}{\text{N}_{RoI}} \sum_{i=1}^{\text{N}_{RoI}} \ell_{cls}(c_i, c_i^*),
|
||||
\end{equation}
|
||||
is the average cross-entropy classification loss,
|
||||
\begin{equation}
|
||||
\text{L}_{box} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_{i=1}^{\text{N}_{RoI}} [c_i^* \geq 1] \cdot \ell_{reg}(b_i^* - b_i)
|
||||
L_{box} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_{i=1}^{\text{N}_{RoI}} [c_i^* \geq 1] \cdot \ell_{reg}(b_i^* - b_i)
|
||||
\end{equation}
|
||||
is the average smooth-$\ell_1$ bounding box regression loss,
|
||||
\begin{equation}
|
||||
\text{L}_{mask} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_{i=1}^{\text{N}_{RoI}} [c_i^* \geq 1] \cdot \ell_{cls*}(m_i,m_i^*)
|
||||
L_{mask} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_{i=1}^{\text{N}_{RoI}} [c_i^* \geq 1] \cdot \ell_{cls*}(m_i,m_i^*)
|
||||
\end{equation}
|
||||
is the average binary cross-entropy mask loss,
|
||||
\begin{equation}
|
||||
|
||||
@ -1,5 +0,0 @@
|
||||
[0] Config.pm:343> INFO - This is Biber 2.5
|
||||
[0] Config.pm:346> INFO - Logfile is 'thesis.aux.blg'
|
||||
[36] biber:290> INFO - === So Okt 29, 2017, 10:29:33
|
||||
[108] Utils.pm:165> ERROR - Cannot find control file 'thesis.aux.bcf'! - did you pass the "backend=biber" option to BibLaTeX?
|
||||
[108] Biber.pm:113> INFO - ERRORS: 1
|
||||
29
thesis.tex
29
thesis.tex
@ -3,14 +3,15 @@
|
||||
11pt, % Schriftgröße
|
||||
bibliography=totoc, % Bibliografie automatisch im Inhaltsverzeichnis generieren
|
||||
parskip=off, % Absatzabstand: off, half, full
|
||||
oneside, % einseitiges Layout
|
||||
% twoside, % zweiseitiges Layout
|
||||
% oneside, % einseitiges Layout
|
||||
twoside, % zweiseitiges Layout
|
||||
article, % verwendet KOMA-Klasse scrartcl
|
||||
% longdoc=true,
|
||||
accentcolor=tud2b,
|
||||
accentcolor=tud1b,
|
||||
% colorbacktitle, % Hintergrund des Titels einfärben
|
||||
colorback, % Hintergrund unter dem Titel einfärben
|
||||
type=bsc, % für Bachelorarbeit
|
||||
type=bsc, % für Bachelorarbeit,
|
||||
openright=true
|
||||
]{tudthesis}
|
||||
|
||||
\usepackage[a-1b]{pdfx}
|
||||
@ -37,7 +38,11 @@
|
||||
% Leider funktioniert hyperref nicht 100%-ig einwandfrei mit tudstyle; bei Problemen damit lieber aus dem Dokument entfernen!
|
||||
% Einstellungen für hyperref
|
||||
\hypersetup{%
|
||||
pdftitle=Motion R-CNN,
|
||||
pdfinfo={
|
||||
Title={Motion R-CNN: Instance-level 3D Motion Estimation with Region-based CNNs},
|
||||
Author={Simon Meister},
|
||||
}
|
||||
pdftitle=Motion R-CNN: Instance-level 3D Motion Estimation with Region-based CNNs,
|
||||
pdfauthor=Simon Meister,
|
||||
pdfsubject=B.Sc. Thesis,
|
||||
unicode=true, % benötigt, damit Umlaute im pdftitle richtig dargestellt werden
|
||||
@ -48,6 +53,11 @@
|
||||
%% citecolor=NavyBlue%DeepPink3
|
||||
}
|
||||
|
||||
\pdfinfo{
|
||||
/Title (Motion R-CNN: Instance-level 3D Motion Estimation with Region-based CNNs)
|
||||
/Author (Simon Meister)
|
||||
}
|
||||
|
||||
\usepackage[
|
||||
backend=biber, % biber ist das Standard-Backend für Biblatex. Für die Abwärtskompatibilität kann hier auch bibtex oder bibtex8 gewählt werden (siehe biblatex-Dokumentation)
|
||||
style=numeric, %numeric, authortitle, alphabetic etc.
|
||||
@ -108,23 +118,26 @@
|
||||
%\affidavit{\myname}
|
||||
|
||||
\pagestyle{myheadings} % Seitenstil umschalten
|
||||
\pagenumbering{gobble}
|
||||
%\pagenumbering{Roman} % Seitennummerierung auf römische Zahlen ändern
|
||||
%\setcounter{page}{1}
|
||||
%\mymarkright{Version: \today} % Inhalt der Fußzeile
|
||||
|
||||
|
||||
\input{abstract}
|
||||
\clearpage
|
||||
|
||||
%\pagenumbering{Roman} % Seitennummerierung auf römische Zahlen ändern
|
||||
\setcounter{page}{1}
|
||||
\pagenumbering{arabic} % Arabische Seitenzahlen
|
||||
|
||||
\setcounter{tocdepth}{3}
|
||||
\tableofcontents
|
||||
\setcounter{page}{1}
|
||||
\clearpage
|
||||
|
||||
% Aktuelle Seitenzahl speichern, da Wechsel auf arabische Zahlen die Zählung zurücksetzt
|
||||
%% \newcounter{savedromanpagenumber}
|
||||
%% \setcounter{savedromanpagenumber}{\value{page}}
|
||||
%\pagenumbering{arabic} % Arabische Seitenzahlen
|
||||
|
||||
|
||||
\section{Introduction}
|
||||
\label{sec:introduction}
|
||||
|
||||
112
thesis.xmpdata
112
thesis.xmpdata
@ -1,112 +0,0 @@
|
||||
% Replace the following information with your document's actual
|
||||
% metadata. If you do not want to set a value for a certain parameter,
|
||||
% just omit it.
|
||||
%
|
||||
% Symbols permitted in metadata
|
||||
% =============================
|
||||
%
|
||||
% Within the metadata, all printable ASCII characters except
|
||||
% '\', '{', '}', and '%' represent themselves. Also, all printable
|
||||
% Unicode characters from the basic multilingual plane (i.e., up to
|
||||
% code point U+FFFF) can be used directly with the UTF-8 encoding.
|
||||
% Consecutive whitespace characters are combined into a single
|
||||
% space. Whitespace after a macro such as \copyright, \backslash, or
|
||||
% \sep is ignored. Blank lines are not permitted. Moreover, the
|
||||
% following markup can be used:
|
||||
%
|
||||
% '\ ' - a literal space (for example after a macro)
|
||||
% \% - a literal '%'
|
||||
% \{ - a literal '{'
|
||||
% \} - a literal '}'
|
||||
% \backslash - a literal '\'
|
||||
% \copyright - the (c) copyright symbol
|
||||
%
|
||||
% The macro \sep is only permitted within \Author, \Keywords, and
|
||||
% \Org. It is used to separate multiple authors, keywords, etc.
|
||||
%
|
||||
% List of supported metadata fields
|
||||
% =================================
|
||||
%
|
||||
% Here is a complete list of user-definable metadata fields currently
|
||||
% supported, and their meanings. More may be added in the future.
|
||||
%
|
||||
% General information:
|
||||
%
|
||||
% \Author - the document's human author. Separate multiple
|
||||
% authors with \sep.
|
||||
% \Title - the document's title.
|
||||
% \Keywords - list of keywords, separated with \sep.
|
||||
% \Subject - the abstract.
|
||||
% \Org - publishers.
|
||||
%
|
||||
% Copyright information:
|
||||
%
|
||||
% \Copyright - a copyright statement.
|
||||
% \CopyrightURL - location of a web page describing the owner
|
||||
% and/or rights statement for this document.
|
||||
% \Copyrighted - 'True' if the document is copyrighted, and
|
||||
% 'False' if it isn't. This is automatically set
|
||||
% to 'True' if either \Copyright or \CopyrightURL
|
||||
% is specified, but can be overridden. For
|
||||
% example, if the copyright statement is "Public
|
||||
% Domain", this should be set to 'False'.
|
||||
%
|
||||
% Publication information:
|
||||
%
|
||||
% \PublicationType - The type of publication. If defined, must be
|
||||
% one of book, catalog, feed, journal, magazine,
|
||||
% manual, newsletter, pamphlet. This is
|
||||
% automatically set to "journal" if \Journaltitle
|
||||
% is specified, but can be overridden.
|
||||
% \Journaltitle - The title of the journal in which the document
|
||||
% was published.
|
||||
% \Journalnumber - The ISSN for the publication in which the
|
||||
% document was published.
|
||||
% \Volume - Journal volume.
|
||||
% \Issue - Journal issue/number.
|
||||
% \Firstpage - First page number of the published version of
|
||||
% the document.
|
||||
% \Lastpage - Last page number of the published version of
|
||||
% the document.
|
||||
% \Doi - Digital Object Identifier (DOI) for the
|
||||
% document, without the leading "doi:".
|
||||
% \CoverDisplayDate - Date on the cover of the journal issue, as a
|
||||
% human-readable text string.
|
||||
% \CoverDate - Date on the cover of the journal issue, in a
|
||||
% format suitable for storing in a database field
|
||||
% with a 'date' data type.
|
||||
|
||||
|
||||
|
||||
\Title{Motion R-CNN: Instance-level 3D Motion Estimation with Region-based CNNs}
|
||||
|
||||
\Author{Simon Meister}
|
||||
|
||||
\Copyright{Copyright \copyright\ 2017 "Simon Meister"}
|
||||
|
||||
\Keywords{optical flow\sep
|
||||
instance segmentation\sep
|
||||
deep learning}
|
||||
|
||||
\Subject{With the advent of deep learning, it has become popular to re-purpose generic deep networks for classical
|
||||
computer vision problems involving pixel-wise estimation.
|
||||
Following this trend, many recent end-to-end deep learning approaches to optical flow and scene flow
|
||||
predict complete, high resolution flow fields with a generic network for dense, pixel-wise prediction,
|
||||
thereby ignoring the inherent structure of the underlying motion estimation problem and any physical
|
||||
constraints within the scene.
|
||||
We introduce a scalable end-to-end deep learning approach for dense motion estimation that respects the
|
||||
structure of the scene as being composed of distinct objects, thus combining the representation learning
|
||||
benefits and speed of end-to-end deep networks with a physically plausible scene model inspired by
|
||||
slanted plane energy-minimization approaches to scene flow.
|
||||
Building on recent advances in region-based convolutional networks (R-CNNs), we integrate motion
|
||||
estimation with instance segmentation. Given two consecutive frames from a monocular RGB-D camera,
|
||||
our resulting end-to-end deep network detects objects with precise per-pixel object masks and estimates
|
||||
the 3D motion of each detected object between the frames. By additionally estimating a global camera
|
||||
motion in the same network, we compose a dense optical flow field based on instance-level and global
|
||||
motion predictions. We train our network on the synthetic Virtual KITTI dataset, which provides ground
|
||||
truth for all components of our system.}
|
||||
|
||||
\setRGBcolorprofile{sRGB_IEC61966-2-1_black_scaled.icc}
|
||||
{sRGB_IEC61966-2-1_black_scaled}
|
||||
{sRGB IEC61966 v2.1 with black scaling}
|
||||
{http://www.color.org}
|
||||
Loading…
x
Reference in New Issue
Block a user