Ab sofort ist der Login auf der Weboberfläche von git.fh-muenster.de bevorzugt über FH Muenster SSO möglich.

Commit 89e4a2d0 authored by Daniel Lukats's avatar Daniel Lukats

adjusted main, includes and glossary

parent af2336af
\newglossaryentry{action}{
name = {action},
description = {actions are chosen by the agent and manipulate the environment}
description = {chosen by the agent to interact with the environment. Denoted $a$ or $A_t$}
}
\newglossaryentry{advantage}{
name = {advantage},
first = {\emph{advantage}},
plural = {advantages},
firstplural = {\emph{advantages}},
description = {TODO}
description = {commonly used to denote a single advantage estimation or value of the advantage function}
}
\newglossaryentry{advantage function}{
name = {advantage function},
description = {describes if an action yields higher or lower reward than the expected behavior would}
}
\newglossaryentry{agent}{
name = {agent},
first = {\emph{agent}},
description = {the acting and learning entity}
}
\newglossaryentry{dynamics}{
name = {dynamics},
description = {a probability distribution that determines the environment's behavior}
}
\newglossaryentry{environment}{
name = {environment},
first = {\emph{environment}},
description = {}
description = {the world surrounding the agent}
}
\newglossaryentry{episode}{
name = {Episode},
description = {}
name = {episode},
description = {TODO}
}
\newglossaryentry{gae-acr}{
name = {GAE},
description = {see \emph{Generalized Advantage Estimation}},
}
\newglossaryentry{gae}{
name = {Generalized Advantage Estimation},
description = {an advanced advantage estimator that allows the tuning of bias versus variance}
}
\newglossaryentry{gradient}{
name = {gradient ascent/descent},
description = {a method for maximizing/minimizing multidimensional optimization problems}
}
\newglossaryentry{horizon}{
name = {horizon},
first = {\emph{horizon}},
description = {TODO},
description = {the final time step of an episode},
}
\newglossaryentry{hyperparameter}{
name = {Hyperparameter},
description = {}
name = {hyperparameter},
description = {a configuration parameter that is not learned by the agent}
}
\newglossaryentry{mdp}{
name = {Markov decision process},
first = {\emph{Markov decision process}},
plural = {Markov decision processes},
firstplural = {\emph{Markov decision processes}},
description = {like a Markov process but not TODO}
description = {a stochastic process containing states, actions and rewards as well as probability distributions
determining the behavior of the agent and the environment}
}
\newglossaryentry{ppo}{
\newglossaryentry{policy}{
name = {policy},
description = {a probability distribution that determines the agent's behavior}
}
\newglossaryentry{ppo-acr}{
name = {PPO},
first = {Proximal Policy Optimization (PPO)},
description = {a class of deep policy gradient algorithms},
description = {see \emph{Proximal Policy Optimization}}
}
\newglossaryentry{ppo}{
name = {Proximal Policy Optimizaiton},
description = {a class of deep reinforcement learning algorithms. The specific version used in this thesis is called
PPO clip},
}
\newglossaryentry{return}{
name = {return},
first = {\emph{return}},
plural = {returns},
firstplural = {\emph{returns}},
description = {TODO},
description = {a sum of rewards seen after a given time step},
}
\newglossaryentry{reward}{
name = {reward},
description = {rewards are observed by the agent following an action. TODO maximization goal}
description = {observed by the agent following an action. The agent is trained to maximize the observed rewards.
Denoted $r$ or $R_{t+1}$}
}
\newglossaryentry{rollout}{
name = {rollout},
description = {}
description = {a sequence of states, actions, rewards, action probabilities and values generated from interaction of
the agent with the environment}
}
\newglossaryentry{state}{
name = {state},
first = {\emph{state}},
plural = {states},
firstplural = {\emph{states}},
description = {states describe the environment}
description = {describes the environment. Denoted $s, s'$ or $S_t$}
}
\newglossaryentry{trajectory}{
name = {trajectory},
first = {\emph{trajectory}},
description = {the sequence of states, actions and rewards generated from interaction of the agent and the
description = {a sequence of states, actions and rewards generated from interaction of the agent with the
environment},
}
......@@ -2,21 +2,19 @@
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{lmodern}
\usepackage{graphicx}
\usepackage{rotating}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=newest}
% \usepackage[headsepline]{scrlayer-scrpage}
% \clearpairofpagestyles
% FIGURES
\usepackage{graphicx}
\usepackage{rotating}
\usepackage{longtable}
% TODO
\usepackage{todonotes}
% Maths
% MATHS
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{bm}
......@@ -50,10 +48,11 @@
\input{glossary}
\makeglossaries
% FORMATTING
\usepackage[onehalfspacing]{setspace}
\usepackage{geometry}
\usepackage{layouts}
\usepackage{etoolbox}
\AtBeginEnvironment{thebibliography}{\interlinepenalty=1000}
\usepackage{csquotes}
%\usepackage[dvipsnames]{xcolor}
%\newenvironment{longlisting}{\captionsetup{type=listing}}{}
......@@ -15,10 +15,10 @@
\hspace{0pt}
\setcounter{page}{0}
\setcounter{figure}{0}
\clearpage
\todo[inline]{eidesstattliche erklärung}
\clearpage
\cleardoublepage
\phantomsection
\pdfbookmark[1]{Table of contents}{}
\tableofcontents
......@@ -49,16 +49,19 @@
\cleardoublepage
% \bibliography{bibliography}
\section*{Test}
\addcontentsline{toc}{section}{MATHSSTUFF GOES HERE TEST}
\todo[inline]{Put list with all mathematical symbols and a short prosa explanation here}
\section*{List of Mathematical Symbols and Definitions}
\addcontentsline{toc}{section}{List of Mathematical Symbols and Definitions}
\label{sec:maths_index}
\input{maths_index.tex}
\glsaddall
\cleardoublepage
%\glsaddall
\printglossaries
\cleardoublepage
\bibliographystyle{apacite}
\bibliography{bibliography.bib}
\clearpage
\input{oath.tex}
\end{document}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment