pre-final

2026-03-01 12:54:10 +00:00 · 2017-11-22 15:50:35 +01:00 · 2017-11-22 15:50:35 +01:00 · d0d7e6b176
commit d0d7e6b176
parent b895fa9f18
7 changed files with 34 additions and 141 deletions
--- a/abstract.tex
+++ b/abstract.tex
@ -33,13 +33,10 @@ and compose a dense optical flow field based on instance-level and global motion
 predictions. We train our network on the synthetic Virtual KITTI dataset,
 which provides ground truth for all components of our system.

-\end{abstract}
-
-\renewcommand{\abstractname}{Zusammenfassung}
-\begin{abstract}
+\subsection*{\textbf{Zusammenfassung}}

 Mit dem Aufkommen von Deep Learning
-ist die Umfunktionierung generischer Deep Networks ein
+ist das Umfunktionieren generischer Deep Networks ein
 beliebter Ansatz für klassische Probleme der Computer Vision geworden,
 die pixelweise Schätzung erfordern.

--- a/approach.tex
+++ b/approach.tex
@ -255,7 +255,7 @@ performs better in our case than the standard $\ell_1$-loss.
 We thus compute the RoI motion loss as

 \begin{equation}
-\text{L}_{motion} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_k^{\text{N}_{RoI}} l_{p}^k + (l_{R}^k + l_{t}^k) \cdot o_k^* + l_o^k,
+L_{motion} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_k^{\text{N}_{RoI}} l_{p}^k + (l_{R}^k + l_{t}^k) \cdot o_k^* + l_o^k,
 \end{equation}
 where
 \begin{equation}
@ -284,7 +284,7 @@ other than $c_k^*$ are not penalized.

 Now, our modified RoI loss is
 \begin{equation}
-\text{L}_{RoI} = \text{L}_{cls} + \text{L}_{box} + \text{L}_{mask} + \text{L}_{motion}.
+L_{RoI} = L_{cls} + L_{box} + L_{mask} + L_{motion}.
 \end{equation}

 \paragraph{Camera motion supervision}
@ -339,7 +339,7 @@ full image resolution, as
 the depth crops and 2D point grid are at the same resolution as the predicted
 $m \times m$ mask.

-For each RoI, we can now compute $\text{L}_{RoI}$ and thus supervise the object motion
+For each RoI, we can now compute $L_{RoI}$ and thus supervise the object motion
 by penalizing the $m \times m$ optical flow grid.
 If there is optical flow ground truth available, we can use the RoI bounding box to
 crop and resize a region from the ground truth optical flow to match the RoI's
@ -377,7 +377,7 @@ Again, as for masks and bounding boxes in Mask R-CNN,
 the predicted output object motions are the predicted object motions for the
 highest scoring class.

-\subsection{Dense flow from motion}
+\subsection{Dense flow from 3D motion}
 \label{ssec:postprocessing}
 As a postprocessing, we compose the dense optical flow between $I_t$ and $I_{t+1}$ from the outputs of our Motion R-CNN network.
 Given the depth map $d_t$ for frame $I_t$, we first create a 3D point cloud in camera space at time $t$,
--- a/background.tex
+++ b/background.tex
@ -594,14 +594,14 @@ it is negative, let $s_i$ be the predicted objectness score and $b_i$, $b_i^*$ t
 predicted and ground truth bounding box encodings.
 Then, the RPN loss is computed as
 \begin{equation}
-\text{L}_{RPN} = \text{L}_{obj} + \text{L}_{box}^{RPN},
+L_{RPN} = L_{obj} + L_{box}^{RPN},
 \end{equation}
 where
 \begin{equation}
-\text{L}_{obj} = \frac{1}{\text{N}_{RPN}} \sum_{i=1}^{\text{N}_{RPN}} \ell_{cls}(s_i, s_i^*),
+L_{obj} = \frac{1}{\text{N}_{RPN}} \sum_{i=1}^{\text{N}_{RPN}} \ell_{cls}(s_i, s_i^*),
 \end{equation}
 \begin{equation}
-\text{L}_{box}^{RPN} = \frac{1}{\text{N}_{RPN}^{pos}} \sum_{i=1}^{\text{N}_{RPN}} s_i^* \cdot \ell_{reg}(b_i^* - b_i),
+L_{box}^{RPN} = \frac{1}{\text{N}_{RPN}^{pos}} \sum_{i=1}^{\text{N}_{RPN}} s_i^* \cdot \ell_{reg}(b_i^* - b_i),
 \end{equation}
 and
 \begin{equation}
@ -630,19 +630,19 @@ In our implementation, we use nearest neighbour resizing for resizing the mask
 targets.
 Then, the ROI loss is computed as
 \begin{equation}
-\text{L}_{RoI} = \text{L}_{cls} + \text{L}_{box} + \text{L}_{mask}
+L_{RoI} = L_{cls} + L_{box} + L_{mask}
 \end{equation}
 where
 \begin{equation}
-\text{L}_{cls} = \frac{1}{\text{N}_{RoI}} \sum_{i=1}^{\text{N}_{RoI}} \ell_{cls}(c_i, c_i^*),
+L_{cls} = \frac{1}{\text{N}_{RoI}} \sum_{i=1}^{\text{N}_{RoI}} \ell_{cls}(c_i, c_i^*),
 \end{equation}
 is the average cross-entropy classification loss,
 \begin{equation}
-\text{L}_{box} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_{i=1}^{\text{N}_{RoI}} [c_i^* \geq 1] \cdot \ell_{reg}(b_i^* - b_i)
+L_{box} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_{i=1}^{\text{N}_{RoI}} [c_i^* \geq 1] \cdot \ell_{reg}(b_i^* - b_i)
 \end{equation}
 is the average smooth-$\ell_1$ bounding box regression loss,
 \begin{equation}
-\text{L}_{mask} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_{i=1}^{\text{N}_{RoI}} [c_i^* \geq 1] \cdot \ell_{cls*}(m_i,m_i^*)
+L_{mask} = \frac{1}{\text{N}_{RoI}^{\mathit{fg}}} \sum_{i=1}^{\text{N}_{RoI}} [c_i^* \geq 1] \cdot \ell_{cls*}(m_i,m_i^*)
 \end{equation}
 is the average binary cross-entropy mask loss,
 \begin{equation}
--- a/thesis.aux.bbl
+++ b/thesis.aux.bbl
--- a/thesis.aux.blg
+++ b/thesis.aux.blg
@ -1,5 +0,0 @@
-[0] Config.pm:343> INFO - This is Biber 2.5
-[0] Config.pm:346> INFO - Logfile is 'thesis.aux.blg'
-[36] biber:290> INFO - === So Okt 29, 2017, 10:29:33
-[108] Utils.pm:165> ERROR - Cannot find control file 'thesis.aux.bcf'! - did you pass the "backend=biber" option to BibLaTeX?
-[108] Biber.pm:113> INFO - ERRORS: 1
--- a/thesis.tex
+++ b/thesis.tex
@ -3,14 +3,15 @@
  11pt, % Schriftgröße
  bibliography=totoc, % Bibliografie automatisch im Inhaltsverzeichnis generieren
  parskip=off, % Absatzabstand: off, half, full
-  oneside, % einseitiges Layout
-%  twoside, % zweiseitiges Layout
+%  oneside, % einseitiges Layout
+  twoside, % zweiseitiges Layout
  article, % verwendet KOMA-Klasse scrartcl
 %  longdoc=true,
-  accentcolor=tud2b,
+  accentcolor=tud1b,
 %  colorbacktitle, % Hintergrund des Titels einfärben
  colorback, % Hintergrund unter dem Titel einfärben
-  type=bsc, % für Bachelorarbeit
+  type=bsc, % für Bachelorarbeit,
+  openright=true
 ]{tudthesis}

 \usepackage[a-1b]{pdfx}
@ -37,7 +38,11 @@
 % Leider funktioniert hyperref nicht 100%-ig einwandfrei mit tudstyle; bei Problemen damit lieber aus dem Dokument entfernen!
 % Einstellungen für hyperref
 \hypersetup{%
-    pdftitle=Motion R-CNN,
+    pdfinfo={
+    Title={Motion R-CNN: Instance-level 3D Motion Estimation with Region-based CNNs},
+    Author={Simon Meister},
+    }
+    pdftitle=Motion R-CNN: Instance-level 3D Motion Estimation with Region-based CNNs,
    pdfauthor=Simon Meister,
    pdfsubject=B.Sc. Thesis,
    unicode=true, % benötigt, damit Umlaute im pdftitle richtig dargestellt werden
@ -48,6 +53,11 @@
    %% citecolor=NavyBlue%DeepPink3
 }

+\pdfinfo{
+/Title (Motion R-CNN: Instance-level 3D Motion Estimation with Region-based CNNs)
+/Author (Simon Meister)
+}
+
 \usepackage[
    backend=biber, % biber ist das Standard-Backend für Biblatex. Für die Abwärtskompatibilität kann hier auch bibtex oder bibtex8 gewählt werden (siehe biblatex-Dokumentation)
    style=numeric, %numeric, authortitle, alphabetic etc.
@ -108,23 +118,26 @@
 %\affidavit{\myname}

 \pagestyle{myheadings} % Seitenstil umschalten
+\pagenumbering{gobble}
+%\pagenumbering{Roman} % Seitennummerierung auf römische Zahlen ändern
+%\setcounter{page}{1}
 %\mymarkright{Version: \today} % Inhalt der Fußzeile


 \input{abstract}
 \clearpage

-%\pagenumbering{Roman} % Seitennummerierung auf römische Zahlen ändern
+\setcounter{page}{1}
+\pagenumbering{arabic} % Arabische Seitenzahlen

 \setcounter{tocdepth}{3}
 \tableofcontents
-\setcounter{page}{1}
 \clearpage

 % Aktuelle Seitenzahl speichern, da Wechsel auf arabische Zahlen die Zählung zurücksetzt
 %% \newcounter{savedromanpagenumber}
 %% \setcounter{savedromanpagenumber}{\value{page}}
-%\pagenumbering{arabic} % Arabische Seitenzahlen
+

 \section{Introduction}
 \label{sec:introduction}
--- a/thesis.xmpdata
+++ b/thesis.xmpdata
@ -1,112 +0,0 @@
-% Replace the following information with your document's actual
-% metadata. If you do not want to set a value for a certain parameter,
-% just omit it.
-%
-% Symbols permitted in metadata
-% =============================
-%
-% Within the metadata, all printable ASCII characters except
-% '\', '{', '}', and '%' represent themselves. Also, all printable
-% Unicode characters from the basic multilingual plane (i.e., up to
-% code point U+FFFF) can be used directly with the UTF-8 encoding.
-% Consecutive whitespace characters are combined into a single
-% space. Whitespace after a macro such as \copyright, \backslash, or
-% \sep is ignored. Blank lines are not permitted. Moreover, the
-% following markup can be used:
-%
-%  '\ '         - a literal space  (for example after a macro)
-%   \%          - a literal '%'
-%   \{          - a literal '{'
-%   \}          - a literal '}'
-%   \backslash  - a literal '\'
-%   \copyright  - the (c) copyright symbol
-%
-% The macro \sep is only permitted within \Author, \Keywords, and
-% \Org.  It is used to separate multiple authors, keywords, etc.
-%
-% List of supported metadata fields
-% =================================
-%
-% Here is a complete list of user-definable metadata fields currently
-% supported, and their meanings. More may be added in the future.
-%
-% General information:
-%
-%  \Author           - the document's human author. Separate multiple
-%                      authors with \sep.
-%  \Title            - the document's title.
-%  \Keywords         - list of keywords, separated with \sep.
-%  \Subject          - the abstract.
-%  \Org              - publishers.
-%
-% Copyright information:
-%
-%  \Copyright        - a copyright statement.
-%  \CopyrightURL     - location of a web page describing the owner
-%                      and/or rights statement for this document.
-%  \Copyrighted      - 'True' if the document is copyrighted, and
-%                      'False' if it isn't. This is automatically set
-%                      to 'True' if either \Copyright or \CopyrightURL
-%                      is specified, but can be overridden. For
-%                      example, if the copyright statement is "Public
-%                      Domain", this should be set to 'False'.
-%
-% Publication information:
-%
-% \PublicationType   - The type of publication. If defined, must be
-%                      one of book, catalog, feed, journal, magazine,
-%                      manual, newsletter, pamphlet. This is
-%                      automatically set to "journal" if \Journaltitle
-%                      is specified, but can be overridden.
-% \Journaltitle      - The title of the journal in which the document
-%                      was published.
-% \Journalnumber     - The ISSN for the publication in which the
-%                      document was published.
-% \Volume            - Journal volume.
-% \Issue             - Journal issue/number.
-% \Firstpage         - First page number of the published version of
-%                      the document.
-% \Lastpage          - Last page number of the published version of
-%                      the document.
-% \Doi               - Digital Object Identifier (DOI) for the
-%                      document, without the leading "doi:".
-% \CoverDisplayDate  - Date on the cover of the journal issue, as a
-%                      human-readable text string.
-% \CoverDate         - Date on the cover of the journal issue, in a
-%                      format suitable for storing in a database field
-%                      with a 'date' data type.
-
-
-
-\Title{Motion R-CNN: Instance-level 3D Motion Estimation with Region-based CNNs}
-
-\Author{Simon Meister}
-
-\Copyright{Copyright \copyright\ 2017 "Simon Meister"}
-
-\Keywords{optical flow\sep
-          instance segmentation\sep
-          deep learning}
-
-\Subject{With the advent of deep learning, it has become popular to re-purpose generic deep networks for classical
-computer vision problems involving pixel-wise estimation.
-Following this trend, many recent end-to-end deep learning approaches to optical flow and scene flow
-predict complete, high resolution flow fields with a generic network for dense, pixel-wise prediction,
-thereby ignoring the inherent structure of the underlying motion estimation problem and any physical
-constraints within the scene.
-We introduce a scalable end-to-end deep learning approach for dense motion estimation that respects the
-structure of the scene as being composed of distinct objects, thus combining the representation learning
-benefits and speed of end-to-end deep networks with a physically plausible scene model inspired by
-slanted plane energy-minimization approaches to scene flow.
-Building on recent advances in region-based convolutional networks (R-CNNs), we integrate motion
-estimation with instance segmentation. Given two consecutive frames from a monocular RGB-D camera,
-our resulting end-to-end deep network detects objects with precise per-pixel object masks and estimates
-the 3D motion of each detected object between the frames. By additionally estimating a global camera
-motion in the same network, we compose a dense optical flow field based on instance-level and global
-motion predictions. We train our network on the synthetic Virtual KITTI dataset, which provides ground
-truth for all components of our system.}
-
-\setRGBcolorprofile{sRGB_IEC61966-2-1_black_scaled.icc}
-{sRGB_IEC61966-2-1_black_scaled}
-{sRGB IEC61966 v2.1 with black scaling}
-{http://www.color.org}