diff --git a/.DS_Store b/.DS_Store
index 19ae14f5e69d2bbbbdb52b8adfc3ef5b1fa26104..eb21c14837cde40829ae51d1b1ab7ef9341a5019 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/.env b/.env
new file mode 100644
index 0000000000000000000000000000000000000000..2164f16ec8b25eeb881ee54f71e2e49476347efc
--- /dev/null
+++ b/.env
@@ -0,0 +1,4 @@
+DB_HOST=wi-sql.fh-muenster.de
+DB_PORT=5432
+DB_USER=digibim
+DB_PASSWORD='Di&i$IM0815'
diff --git a/04_data_analysis.qmd b/04_data_analysis.qmd
index 199c1ca56c8dffbf9b7d2e8ef9ba77835dbe555b..8dea72a7b39506d3b325cd4c2c7a428757826380 100644
--- a/04_data_analysis.qmd
+++ b/04_data_analysis.qmd
@@ -6,6 +6,50 @@ number-offset: [5,0]
 bibliography: references.bib
 ---
 
+```{python}
+#| eval: true
+#| echo: false
+#| output: false
+
+import os
+from dotenv import load_dotenv
+import sqlalchemy
+import pandas as pd
+from urllib.parse import quote_plus
+
+load_dotenv(".env")
+
+class Database:
+    def __init__(self, db_name):
+        self.conn = db_connect(db_name)
+
+    def __getattr__(self, table_name):
+        return pd.read_sql_table(table_name, self.conn)
+
+    def list_tables(self):
+        inspector = sqlalchemy.inspect(self.conn)
+        table_names = inspector.get_table_names()
+        return table_names
+
+def db_connect(db_name):
+    hostname=os.getenv("DB_HOST")
+    user=os.getenv("DB_USER")
+    password=quote_plus(os.getenv("DB_PASSWORD"))
+    conn = sqlalchemy.create_engine(f'postgresql+psycopg2://{user}:{password}@{hostname}/{db_name}')
+    return conn
+
+def get_table(db_name, table_name):
+    conn = db_connect(db_name)
+    dat = pd.read_sql_table(table_name, conn)
+    return dat
+
+def get_all_tables(db_name):
+    db_obj = Database(db_name)
+    return db_obj
+
+
+```
+
 
 
 ## Where are we? {.unnumbered}
@@ -258,66 +302,633 @@ df %>% kbl(format = "html", escape = FALSE, col.names = c("","","Explanation","E
 ```
 
 
+# Explorative Data Analysis (EDA) {background-color="#0014a0"}
+
+::: footer
+:::
+
+## Introduction to EDA
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+#### EDA is an iterative process: 
+
+1. Generate questions about your data.
+2. Search for answers by visualising, transforming, and modelling your data.
+3. Use what you learn to refine your questions and/or generate new questions.
+
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+#### EDA is a state of mind:
+
+- EDA is not a formal process with a strict set of rules
+- During the initial phases of EDA you should feel free to investigate every idea that occurs to you
+- Some of these ideas will pan out, and some will be dead ends
+- As your exploration continues, you will home in on a few particularly productive areas that youâ€™ll eventually write up and communicate to others
+:::
+::::
+
+
+
+## Definitions
 
-# Key figures {background-color="#0014a0"}
+- A **variable** is a quantity, quality, or property that you can measure.
+
+- A **value** is the state of a variable when you measure it. The value of a variable may change from measurement to measurement.
+
+- An **observation** is a set of measurements made under similar conditions (you usually make all of the measurements in an observation at the same time and on the same object). An observation will contain several values, each associated with a different variable. Iâ€™ll sometimes refer to an observation as a data point.
+
+- **Tabular data** is a set of values, each associated with a variable and an observation. Tabular data is tidy if each value is placed in its own â€œcellâ€, each variable in its own column, and each observation in its own row.
+
+
+# Tidy data {background-color="#0014a0"}
 
 ::: footer
 :::
 
-## Averages
+## Tidy data
 
-## Variation
+Starting point for any data analysis should be a tidy dataset. You can represent the same underlying data in multiple ways. There are three interrelated rules which make a dataset tidy:
 
-## Ratios
+1. Each variable must have its own column.
+2. Each observation must have its own row.
+3. Each value must have its own cell.
 
-## Dependencies/relationships
+![Following three rules makes a dataset tidy: variables are in columns, observations are in rows, and values are in cells. (*Source:* @Wickham2017R)](https://bookdown.org/joone/ComputationalMethods/img/7/tidy.png){#fig-tidy}
 
-## Correlation
 
-## Non-linear relationships
 
-## Contingency
+## Making data tidy
 
-## Difference-in-Differences
+:::: {.columns}
 
-## Causality
+::: {.column width="47.5%"}
 
-# Best practices {background-color="#0014a0"}
+![Pivoting into a long tidy form. (*Source:* @Wickham2017R)](https://d33wubrfki0l68.cloudfront.net/3aea19108d39606bbe49981acda07696c0c7fcd8/2de65/images/tidy-9.png){#fig-tidy1}
 
+:::
 
+::: {.column width="5%"}
 
-## Feature Engineering
+:::
 
+::: {.column width="47.5%"}
 
+![Pivoting into a wide tidy form. (*Source:* @Wickham2017R)](https://d33wubrfki0l68.cloudfront.net/8350f0dda414629b9d6c354f87acf5c5f722be43/bcb84/images/tidy-8.png){#fig-tidy2}
 
-## Data preparation
+:::
+::::
 
-![Visualization of the data preparation tasks](img/featureengineering2.png){#fig-dataprep}
 
-![Visualization of feature engineering tasks](img/featureengineering.png){#fig-featureengineering}
+## Describe tables with pandas
+
+:::: {.columns}
 
+::: {.column width="37.5%"}
 
+- In order to get an overview on a table, you can use the `describe` method
+- By default, describe computes 7 summary figures for numeric columns (mean, standard deviation, min, max and quartiles)
+- The argument `include='all'` adds an overview on categorical variables
+
+
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="57.5%"}
+
+#### Example: DVD Rental
+
+```{python}
+#| echo: true
+#| cache: true
+
+# Import tables
+import pandas as pd
+dvdrental = get_all_tables("dvdrental")
+dvdrental.list_tables()
+```
+
+
+```{python}
+#| echo: true
+#| cache: true
+
+# Describe customer table
+dvdrental.customer.describe(include='all')
+```
+
+:::
+::::
+
+
+## Merge/join tables with pandas
+
+
+- To join two tables, the `merge` method from Python's pandas library can be used:
+- **Syntax & Parameters:**
+  - `pd.merge(left, right, how='inner', on=None, left_on=None, right_on=None, ...)`
+  - `left`: First DataFrame to join.
+  - `right`: Second DataFrame to join.
+  - `how`: Type of merge to perform (e.g., 'inner', 'outer', 'left', 'right').
+  - `on`: Column name(s) to join on. Must be found in both DataFrames.
+  - `left_on`: Column(s) from the left DataFrame to use as keys.
+  - `right_on`: Column(s) from the right DataFrame to use as keys.
+- **Join Types:**
+  - `inner`: Only rows with matching keys in both DataFrames are included.
+  - `outer`: All rows from both DataFrames are included, with NaN for missing keys.
+  - `left`: All rows from the left DataFrame are included, along with matched rows from the right DataFrame.
+  - `right`: All rows from the right DataFrame are included, along with matched rows from the left DataFrame.
+
+
+## Example: Merge/join tables with pandas 
+
+```{python}
+#| echo: true
+#| cache: true
+
+# Merge customer, rental and payment tables
+df = (
+dvdrental.customer.merge(
+  dvdrental.rental,
+  on = ['customer_id']
+).merge(
+  dvdrental.payment,
+  on = ['rental_id', 'customer_id']
+)
+)
+
+df.describe(include='all')
+```
+
+
+## Aggregate tables with pandas
+
+
+- In pandas, the `agg` function is used for aggregation operations, allowing for multiple statistics to be calculated simultaneously.
+
+- **Syntax & Parameters:**
+  - `DataFrame.agg(func, axis=0, *args, **kwargs)`
+  - `func`: Function, string function name, list of functions, or dict of column names to functions.
+  - `axis`: Axis along which the function is applied (0 for columns, 1 for rows).
+
+- **Function Input Varieties:**
+  - Single function (e.g., `'sum'`)
+  - List of functions (e.g., `['sum', 'mean']`)
+  - Dictionary mapping columns to functions (e.g., `{'col1': 'sum', 'col2': ['mean', 'std']}`)
+
+- **Behavior with GroupBy:**
+  - Often used in conjunction with `groupby` to perform grouped aggregations.
+
+
+## Example: Aggregate tables with pandas
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+```{python}
+#| echo: true
+#| cache: true
+
+# Merge rental, film and actor
+df = (
+dvdrental.rental
+.merge(dvdrental.inventory, on = ['inventory_id'])
+.merge(dvdrental.film, on = ['film_id'])
+.merge(dvdrental.film_actor, on = ['film_id'])
+.merge(dvdrental.actor, on = ['actor_id'])
+)
+```
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+```{python}
+#| echo: true
+#| cache: true
+
+# Count rentals per actor
+(
+  df
+  .groupby(by = ['actor_id', 'last_name', 'first_name'], 
+           as_index = False)
+  .agg(count_rentals=('rental_id', 'count'))
+  .sort_values(by = 'count_rentals', ascending=False)
+)
+```
+:::
+::::
+
+
+
+
+# Variation {background-color="#0014a0"}
 
 ::: footer
 :::
 
+## Variation
+
+- **Variation Definition:** Tendency of variable values to change with each measurement.
+- **Continuous Variables:** Each measurement likely yields slightly different results.
+- **Constant Quantities:** Even with constants like the speed of light, measurements vary due to errors.
+- **Categorical Variables Variation:**
+  - Different subjects, e.g., various people's eye colors.
+  - Different times, e.g., an electron's energy levels at varied moments.
+- **Pattern of Variation:** Each variable has a unique pattern that can provide insightful information.
+- **Understanding Patterns:** Visualizing a variable's distribution helps understand its variation pattern.
+
+
+## Histograms
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+
+
+- A **histogram** visualizes the distribution of continuous variables
+- A bar for each bin represents the frequency of observations in this bin
+- Bins can be specified in different variants (e.g. same width vs. varying width)
+
+```{python}
+#| eval: true
+#| echo: true
+#| cache: true
+
+# Compute revenue per customer
+df = (
+dvdrental.customer
+.merge(dvdrental.rental, on = ['customer_id'])
+.merge(dvdrental.payment, on = ['rental_id', 'customer_id'])
+.groupby('customer_id')
+.agg(revenue=('amount', 'sum'))
+)
+```
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
 
+The `matplotlib` library offers a function `hist` to draw histograms:
 
-## Definition
+```{python}
+#| eval: true
+#| echo: true
+#| label: fig-hist
+#| fig-cap: "Histogram of revenues per cusotmer in the DVD Rental data base"
+#| cache: true
+
+import matplotlib.pyplot as plt
+plt.hist(df['revenue'], bins = 10)
+plt.show()
+```
+:::
+::::
 
-# Exercise {background-color="#0014a0"}
 
-## Exercise 
 
-::: callout-caution
-## Exercise
 
-Please analyze the following use case
 
 
+## Bar plots
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+
+- In a *bar plot*, the height of a bar represents the frequency of values of a categorical variable
+- Bars can show absolute or relative frequencies
+
+```{python}
+#| eval: true
+#| echo: true
+#| cache: true
+
+# Compute rentals per staff member
+df = (
+dvdrental.customer
+.merge(dvdrental.rental, on = ['customer_id'], 
+       suffixes = ('_customer', '_rental'))
+.merge(dvdrental.staff, on = ['staff_id'],
+      suffixes = ('_customer', '_staff'))
+.groupby(['staff_id', 'first_name_staff', 
+          'last_name_staff', 'email_staff'], 
+          as_index=False)
+.agg(rentals_count=('rental_id', 'count'))
+)
+```
+:::
+
+::: {.column width="5%"}
 
 :::
 
+::: {.column width="47.5%"}
+
+The `matplotlib` library offers a function `bar` to draw histograms:
+```{python}
+#| eval: true
+#| echo: true
+#| label: fig-bar
+#| fig-cap: "Barplot of count of rentals per staff member in the DVD Rental data base"
+#| cache: true
+
+plt.bar(df['first_name_staff'], df['rentals_count'])
+plt.show()
+```
+:::
+::::
+
+
+
+## Analyzing variation/distributions
+
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+#### Questions to ask:
+
+1. Which values are the most common? Why?
+2. Which values are rare? Why? Does that match your expectations?
+3. Can you see any unusual patterns? What might explain them?
+
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+#### KPIs describing the distribution:
+
+- Frequencies (absolute, relative)
+- Average
+- Standard Deviation, Variance, Median Absolute Deviation
+- Skewness, Kurtosis
+- Outliers
+:::
+::::
+
+
+
+
+
+# Covaration {background-color="#0014a0"}
+
+## Covariation
+
+- **Variation:** describes the behavior within a variable
+- **Covariation:** describes the behavior between variables
+- Covariation is the tendency for the values of two or more variables to vary together in a related way
+- The best way to spot covariation is to visualise the relationship between two or more variables
+- How you do that should again depend on the **type** of variables involved.
+
+## Covariation of a cagtegorical and a continuous variable (1/2)
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+
+- A *bar plot* can be used to compare aggregated values of a numeric values across categories from another variable
+- Aggregations can be counts, sums, ...
+
+```{python}
+#| eval: true
+#| echo: true
+#| cache: true
+
+# Compute rentals per staff member
+df = (
+dvdrental.customer
+.merge(dvdrental.rental, on = ['customer_id'])
+.merge(dvdrental.payment, on = ['customer_id','rental_id', 'staff_id'])
+.merge(dvdrental.staff, on = ['staff_id'], suffixes = ('_customer','_staff'))
+.groupby(['staff_id', 'first_name_staff', 
+          'last_name_staff', 'email_staff'], 
+          as_index=False)
+.agg(revenue=('amount', 'sum'))
+)
+```
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+
+
+```{python}
+#| eval: true
+#| echo: true
+#| label: fig-covarbars
+#| fig-cap: "Barplot of count of rentals per staff member in the DVD Rental data base"
+#| cache: true
+
+plt.bar(df['first_name_staff'], df['revenue'])
+plt.show()
+```
+:::
+::::
+
+
+
+## Covariation of a cagtegorical and a continuous variable (2/2)
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+
+- Using **boxplots**, the entire distribution of a continuous variable can be compared across categories of another variable
+
+```{python}
+#| eval: true
+#| echo: true
+#| cache: true
+
+# Merge tables for revenues
+df = (
+dvdrental.customer
+.merge(dvdrental.rental, on = ['customer_id'], 
+       suffixes = ('_customer', '_rental'))
+.merge(dvdrental.payment, on = ['customer_id', 
+                                'rental_id', 
+                                'staff_id'], 
+       suffixes = ('_customer', '_rental'))
+.merge(dvdrental.staff, on = ['staff_id'],
+      suffixes = ('_customer', '_staff'))    
+)[['first_name_staff', 'amount']]
+```
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+
+The `seaborn` library offers a function `sns.boxplot` to easily draw multiple boxplots for comparison:
+```{python}
+#| eval: true
+#| echo: true
+#| label: fig-boxs
+#| fig-cap: "Boxplots of revenues per rental separately by staff members"
+#| cache: true
+
+import seaborn as sns
+sns.boxplot(x = 'first_name_staff', y = 'amount', data = df)
+```
+:::
+::::
+
+## Covariation of two cagtegorical variables
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+Create a table to identify the number of rentals per film category and country:
+
+```{python}
+df = (
+dvdrental.film
+.merge(dvdrental.inventory, on = ['film_id'])
+.merge(dvdrental.store, on = ['store_id'])
+.merge(dvdrental.rental, on = ['inventory_id'])    
+.merge(dvdrental.film_category, on = ['film_id'])
+.merge(dvdrental.category, on = ['category_id'])
+.merge(dvdrental.address, on = ['address_id'])
+.merge(dvdrental.city, on = ['city_id'])
+.merge(dvdrental.country, on = ['country_id'])
+.groupby(['country', 'name'], 
+          as_index=False)
+.agg(count = ('rental_id', 'count'))
+)
+df
+```
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+
+The `seaborn` library offers a function `heatmap` to create heatmaps:
+```{python}
+#| eval: true
+#| echo: true
+#| label: fig-heatmap
+#| fig-cap: "Heatmap for rentals across film categories and countries"
+#| cache: true
+
+df_pivot = df.pivot(index="name", columns="country", values="count")
+sns.heatmap(df_pivot)
+```
+:::
+::::
+
+
+## Covariation of two continuous variables
+
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+Create a table with duration and amount paid
+
+```{python}
+df = (
+dvdrental.rental
+.merge(dvdrental.payment, on = ['rental_id', 'customer_id', 'staff_id'])
+)
+df['duration'] = (dvdrental.rental.return_date - dvdrental.rental.rental_date).dt.total_seconds()/60/60/24
+df[['rental_id', 'duration', 'amount']]
+
+```
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+
+The `pandas` library offers a method `plot.scatter` to create scatterplots:
+```{python}
+#| eval: true
+#| echo: true
+#| label: fig-scatter
+#| fig-cap: "Scatterplot for rental duration and amount paid"
+#| cache: true
+
+df.plot.scatter(x = 'duration', y = 'amount')
+
+```
+:::
+::::
+
+
+## Analyzing covariation/relationships/dependencies
+
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+#### Questions to ask:
+
+1. Which variables show common patterns? Are they reasonable/explainable?
+2. What form does the relationship have: is it linear, non-linear?
+
+
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+#### KPIs describing the distribution:
+
+- Correlaion (Pearson, Spearman, Kendall)
+- Contingency ($\chi^2$, CramÃ©rs $V$)
+- ...
+:::
+::::
+
+
+
+
+
+## Looking back: Data preparation
+
+![Visualization of the data preparation tasks](img/featureengineering2.png){#fig-dataprep}
+
+## Feature engineering
+
+![Visualization of feature engineering tasks](img/featureengineering.png){#fig-featureengineering}
+
+
+
+::: footer
+:::
+
+
+
 
 # References {.unnumbered .scrollable}
 
diff --git a/04_data_analysis_cache/revealjs/__packages b/04_data_analysis_cache/revealjs/__packages
new file mode 100644
index 0000000000000000000000000000000000000000..630f1cc0301a4e19649bf7b6aeb20c7e6668f191
--- /dev/null
+++ b/04_data_analysis_cache/revealjs/__packages
@@ -0,0 +1,3 @@
+kableExtra
+tibble
+magrittr
diff --git a/04_data_analysis_cache/revealjs/fig-bar_fc1ff52eda35eb054746db6a36567957.RData b/04_data_analysis_cache/revealjs/fig-bar_fc1ff52eda35eb054746db6a36567957.RData
new file mode 100644
index 0000000000000000000000000000000000000000..689b29e9b68ef3c6e36d1792daf7857d51c8ccb9
Binary files /dev/null and b/04_data_analysis_cache/revealjs/fig-bar_fc1ff52eda35eb054746db6a36567957.RData differ
diff --git a/04_data_analysis_cache/revealjs/fig-bar_fc1ff52eda35eb054746db6a36567957.rdb b/04_data_analysis_cache/revealjs/fig-bar_fc1ff52eda35eb054746db6a36567957.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/fig-bar_fc1ff52eda35eb054746db6a36567957.rdx b/04_data_analysis_cache/revealjs/fig-bar_fc1ff52eda35eb054746db6a36567957.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/fig-bar_fc1ff52eda35eb054746db6a36567957.rdx differ
diff --git a/04_data_analysis_cache/revealjs/fig-boxs_3f8cf81cf2682465091dca4869905d5b.RData b/04_data_analysis_cache/revealjs/fig-boxs_3f8cf81cf2682465091dca4869905d5b.RData
new file mode 100644
index 0000000000000000000000000000000000000000..a22f782eec9f213835e94486fc796d1d6100db7b
Binary files /dev/null and b/04_data_analysis_cache/revealjs/fig-boxs_3f8cf81cf2682465091dca4869905d5b.RData differ
diff --git a/04_data_analysis_cache/revealjs/fig-boxs_3f8cf81cf2682465091dca4869905d5b.rdb b/04_data_analysis_cache/revealjs/fig-boxs_3f8cf81cf2682465091dca4869905d5b.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/fig-boxs_3f8cf81cf2682465091dca4869905d5b.rdx b/04_data_analysis_cache/revealjs/fig-boxs_3f8cf81cf2682465091dca4869905d5b.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/fig-boxs_3f8cf81cf2682465091dca4869905d5b.rdx differ
diff --git a/04_data_analysis_cache/revealjs/fig-hist_71caef5da26f2f509bcda560d69b149f.RData b/04_data_analysis_cache/revealjs/fig-hist_71caef5da26f2f509bcda560d69b149f.RData
new file mode 100644
index 0000000000000000000000000000000000000000..3a1f1d4ccb5f96919e43605763e1340f699f1f14
Binary files /dev/null and b/04_data_analysis_cache/revealjs/fig-hist_71caef5da26f2f509bcda560d69b149f.RData differ
diff --git a/04_data_analysis_cache/revealjs/fig-hist_71caef5da26f2f509bcda560d69b149f.rdb b/04_data_analysis_cache/revealjs/fig-hist_71caef5da26f2f509bcda560d69b149f.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/fig-hist_71caef5da26f2f509bcda560d69b149f.rdx b/04_data_analysis_cache/revealjs/fig-hist_71caef5da26f2f509bcda560d69b149f.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/fig-hist_71caef5da26f2f509bcda560d69b149f.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-10_479bbfcec8812b71d6fc6e530e31e965.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-10_479bbfcec8812b71d6fc6e530e31e965.RData
new file mode 100644
index 0000000000000000000000000000000000000000..95e94496fc5c5e7a31a6d5cb1ca42b2460db195c
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-10_479bbfcec8812b71d6fc6e530e31e965.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-10_479bbfcec8812b71d6fc6e530e31e965.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-10_479bbfcec8812b71d6fc6e530e31e965.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-10_479bbfcec8812b71d6fc6e530e31e965.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-10_479bbfcec8812b71d6fc6e530e31e965.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-10_479bbfcec8812b71d6fc6e530e31e965.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-11_89441b0133c42598b635a86797c59c6a.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-11_89441b0133c42598b635a86797c59c6a.RData
new file mode 100644
index 0000000000000000000000000000000000000000..1d000f26a385e5c98b57f31e50085f5d38d35007
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-11_89441b0133c42598b635a86797c59c6a.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-11_89441b0133c42598b635a86797c59c6a.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-11_89441b0133c42598b635a86797c59c6a.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-11_89441b0133c42598b635a86797c59c6a.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-11_89441b0133c42598b635a86797c59c6a.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-11_89441b0133c42598b635a86797c59c6a.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-12_687914cfbaeeeeec53a7c7fefae3bd0b.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-12_687914cfbaeeeeec53a7c7fefae3bd0b.RData
new file mode 100644
index 0000000000000000000000000000000000000000..093df72a3629fe8080be43acd89c1d5815c72d00
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-12_687914cfbaeeeeec53a7c7fefae3bd0b.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-12_687914cfbaeeeeec53a7c7fefae3bd0b.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-12_687914cfbaeeeeec53a7c7fefae3bd0b.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-12_687914cfbaeeeeec53a7c7fefae3bd0b.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-12_687914cfbaeeeeec53a7c7fefae3bd0b.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-12_687914cfbaeeeeec53a7c7fefae3bd0b.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-14_35e622235d58d5353508a610142fbead.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-14_35e622235d58d5353508a610142fbead.RData
new file mode 100644
index 0000000000000000000000000000000000000000..7e24d41ca918f1fdd9fdeaa601c2841964a2efb8
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-14_35e622235d58d5353508a610142fbead.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-14_35e622235d58d5353508a610142fbead.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-14_35e622235d58d5353508a610142fbead.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-14_35e622235d58d5353508a610142fbead.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-14_35e622235d58d5353508a610142fbead.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-14_35e622235d58d5353508a610142fbead.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-16_4931e7a3e18bdabe5cfeb1ed50042ee4.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-16_4931e7a3e18bdabe5cfeb1ed50042ee4.RData
new file mode 100644
index 0000000000000000000000000000000000000000..596e8b2a0294134538b03151640a0cb3fc57f2a6
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-16_4931e7a3e18bdabe5cfeb1ed50042ee4.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-16_4931e7a3e18bdabe5cfeb1ed50042ee4.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-16_4931e7a3e18bdabe5cfeb1ed50042ee4.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-16_4931e7a3e18bdabe5cfeb1ed50042ee4.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-16_4931e7a3e18bdabe5cfeb1ed50042ee4.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-16_4931e7a3e18bdabe5cfeb1ed50042ee4.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-18_80ed6c4419c76e7380fd983d0a842026.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-18_80ed6c4419c76e7380fd983d0a842026.RData
new file mode 100644
index 0000000000000000000000000000000000000000..1e09bd36c79ef6990d3e5c08f35d3be0225fc26d
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-18_80ed6c4419c76e7380fd983d0a842026.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-18_80ed6c4419c76e7380fd983d0a842026.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-18_80ed6c4419c76e7380fd983d0a842026.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-18_80ed6c4419c76e7380fd983d0a842026.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-18_80ed6c4419c76e7380fd983d0a842026.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-18_80ed6c4419c76e7380fd983d0a842026.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-19_7fead5d28eb0b764e98a71e5b5c9ee90.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-19_7fead5d28eb0b764e98a71e5b5c9ee90.RData
new file mode 100644
index 0000000000000000000000000000000000000000..0bd08a713151f2f867a593cf0a3bd85e4f4bf141
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-19_7fead5d28eb0b764e98a71e5b5c9ee90.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-19_7fead5d28eb0b764e98a71e5b5c9ee90.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-19_7fead5d28eb0b764e98a71e5b5c9ee90.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-19_7fead5d28eb0b764e98a71e5b5c9ee90.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-19_7fead5d28eb0b764e98a71e5b5c9ee90.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-19_7fead5d28eb0b764e98a71e5b5c9ee90.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-1_35d11017ccc01a8441741046e75685cc.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-1_35d11017ccc01a8441741046e75685cc.RData
new file mode 100644
index 0000000000000000000000000000000000000000..4800e39c996e300747f43f322a8ec73037833273
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-1_35d11017ccc01a8441741046e75685cc.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-1_35d11017ccc01a8441741046e75685cc.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-1_35d11017ccc01a8441741046e75685cc.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-1_35d11017ccc01a8441741046e75685cc.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-1_35d11017ccc01a8441741046e75685cc.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-1_35d11017ccc01a8441741046e75685cc.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-20_314d5ec78f5adc406f836667b39eb11b.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-20_314d5ec78f5adc406f836667b39eb11b.RData
new file mode 100644
index 0000000000000000000000000000000000000000..1467ab02f95d4720a471e74ba26d31826f9d715d
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-20_314d5ec78f5adc406f836667b39eb11b.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-20_314d5ec78f5adc406f836667b39eb11b.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-20_314d5ec78f5adc406f836667b39eb11b.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-20_314d5ec78f5adc406f836667b39eb11b.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-20_314d5ec78f5adc406f836667b39eb11b.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-20_314d5ec78f5adc406f836667b39eb11b.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-2_6b7bb75b52acd1927a1783d2a3030a3b.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-2_6b7bb75b52acd1927a1783d2a3030a3b.RData
new file mode 100644
index 0000000000000000000000000000000000000000..26b0031818ce2ad4832e70fdc69d6c6e124bc005
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-2_6b7bb75b52acd1927a1783d2a3030a3b.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-2_6b7bb75b52acd1927a1783d2a3030a3b.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-2_6b7bb75b52acd1927a1783d2a3030a3b.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..a24957bca9c14578ac88e89a5c5ce011f3cb015b
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-2_6b7bb75b52acd1927a1783d2a3030a3b.rdb differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-2_6b7bb75b52acd1927a1783d2a3030a3b.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-2_6b7bb75b52acd1927a1783d2a3030a3b.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..7a69961defc6abae6d82232aecef5060c0974c18
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-2_6b7bb75b52acd1927a1783d2a3030a3b.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-3_6117d9d1ce684bfd1f7d17dc35b7188c.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-3_6117d9d1ce684bfd1f7d17dc35b7188c.RData
new file mode 100644
index 0000000000000000000000000000000000000000..98ddd9c208bc13e30079b6a60bf6e8d4f48c2de1
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-3_6117d9d1ce684bfd1f7d17dc35b7188c.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-3_6117d9d1ce684bfd1f7d17dc35b7188c.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-3_6117d9d1ce684bfd1f7d17dc35b7188c.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e11fc7487c71ecddfbcf62834ffa3cd98da2cb59
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-3_6117d9d1ce684bfd1f7d17dc35b7188c.rdb differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-3_6117d9d1ce684bfd1f7d17dc35b7188c.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-3_6117d9d1ce684bfd1f7d17dc35b7188c.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..5fc456d438f75329bfbacd760e0378b667d359e2
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-3_6117d9d1ce684bfd1f7d17dc35b7188c.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-4_e098d7dbb269d0b3dd75b7171646472c.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-4_e098d7dbb269d0b3dd75b7171646472c.RData
new file mode 100644
index 0000000000000000000000000000000000000000..7eaca0984ed3ab7c70aee2efd0fbe058244706b3
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-4_e098d7dbb269d0b3dd75b7171646472c.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-4_e098d7dbb269d0b3dd75b7171646472c.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-4_e098d7dbb269d0b3dd75b7171646472c.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..5529c92159b0c080602e230d12e0d88f3cc851d6
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-4_e098d7dbb269d0b3dd75b7171646472c.rdb differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-4_e098d7dbb269d0b3dd75b7171646472c.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-4_e098d7dbb269d0b3dd75b7171646472c.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..f79a758f8ebcaad073f4db70e73fef86bb3a604e
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-4_e098d7dbb269d0b3dd75b7171646472c.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-5_2d0a40b600e80a7cafe8427cf2d9c1ea.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-5_2d0a40b600e80a7cafe8427cf2d9c1ea.RData
new file mode 100644
index 0000000000000000000000000000000000000000..7747d9041e536d93d3609f23b381837f5f07d41d
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-5_2d0a40b600e80a7cafe8427cf2d9c1ea.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-5_2d0a40b600e80a7cafe8427cf2d9c1ea.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-5_2d0a40b600e80a7cafe8427cf2d9c1ea.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..5529c92159b0c080602e230d12e0d88f3cc851d6
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-5_2d0a40b600e80a7cafe8427cf2d9c1ea.rdb differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-5_2d0a40b600e80a7cafe8427cf2d9c1ea.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-5_2d0a40b600e80a7cafe8427cf2d9c1ea.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..cfa5c269e5b6317cc9d1ac12a378c742b7a6fa2a
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-5_2d0a40b600e80a7cafe8427cf2d9c1ea.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-6_eb907bfe5d5b24818e0038af41345d93.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-6_eb907bfe5d5b24818e0038af41345d93.RData
new file mode 100644
index 0000000000000000000000000000000000000000..e7195ce05772e9948d1ea164b5fe905a80b2dcd1
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-6_eb907bfe5d5b24818e0038af41345d93.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-6_eb907bfe5d5b24818e0038af41345d93.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-6_eb907bfe5d5b24818e0038af41345d93.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..40511247a8e749ceab87ad04ceba557aa83f6376
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-6_eb907bfe5d5b24818e0038af41345d93.rdb differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-6_eb907bfe5d5b24818e0038af41345d93.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-6_eb907bfe5d5b24818e0038af41345d93.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..35751a4d3fcd5be491597caae077a64ee8637e4e
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-6_eb907bfe5d5b24818e0038af41345d93.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-7_4157d48af9e03dcccf842a5b8b44284a.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-7_4157d48af9e03dcccf842a5b8b44284a.RData
new file mode 100644
index 0000000000000000000000000000000000000000..f41ad6e7281ef28b61235143bdec838c28657170
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-7_4157d48af9e03dcccf842a5b8b44284a.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-7_4157d48af9e03dcccf842a5b8b44284a.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-7_4157d48af9e03dcccf842a5b8b44284a.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-7_4157d48af9e03dcccf842a5b8b44284a.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-7_4157d48af9e03dcccf842a5b8b44284a.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-7_4157d48af9e03dcccf842a5b8b44284a.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-8_c3b521478ddf338ea9816a5de15b5cd8.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-8_c3b521478ddf338ea9816a5de15b5cd8.RData
new file mode 100644
index 0000000000000000000000000000000000000000..dabebf36884913c278042d40a2fd21750793aa0c
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-8_c3b521478ddf338ea9816a5de15b5cd8.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-8_c3b521478ddf338ea9816a5de15b5cd8.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-8_c3b521478ddf338ea9816a5de15b5cd8.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-8_c3b521478ddf338ea9816a5de15b5cd8.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-8_c3b521478ddf338ea9816a5de15b5cd8.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-8_c3b521478ddf338ea9816a5de15b5cd8.rdx differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-9_1e14a1e5fdcac4f57383a7cad0b5ef35.RData b/04_data_analysis_cache/revealjs/unnamed-chunk-9_1e14a1e5fdcac4f57383a7cad0b5ef35.RData
new file mode 100644
index 0000000000000000000000000000000000000000..f306f887f8680baa14d3f763b7a7c8c4edd4b3ae
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-9_1e14a1e5fdcac4f57383a7cad0b5ef35.RData differ
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-9_1e14a1e5fdcac4f57383a7cad0b5ef35.rdb b/04_data_analysis_cache/revealjs/unnamed-chunk-9_1e14a1e5fdcac4f57383a7cad0b5ef35.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/04_data_analysis_cache/revealjs/unnamed-chunk-9_1e14a1e5fdcac4f57383a7cad0b5ef35.rdx b/04_data_analysis_cache/revealjs/unnamed-chunk-9_1e14a1e5fdcac4f57383a7cad0b5ef35.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..6fb2189cdb34fa855ea59f3a45e4361c34e897f5
Binary files /dev/null and b/04_data_analysis_cache/revealjs/unnamed-chunk-9_1e14a1e5fdcac4f57383a7cad0b5ef35.rdx differ
diff --git a/04_data_analysis_files/figure-revealjs/fig-bar-3.png b/04_data_analysis_files/figure-revealjs/fig-bar-3.png
new file mode 100644
index 0000000000000000000000000000000000000000..74f428ba1b3983c4854d3115a16ddb773c1d92b2
Binary files /dev/null and b/04_data_analysis_files/figure-revealjs/fig-bar-3.png differ
diff --git a/04_data_analysis_files/figure-revealjs/fig-bar-5.png b/04_data_analysis_files/figure-revealjs/fig-bar-5.png
new file mode 100644
index 0000000000000000000000000000000000000000..74f428ba1b3983c4854d3115a16ddb773c1d92b2
Binary files /dev/null and b/04_data_analysis_files/figure-revealjs/fig-bar-5.png differ
diff --git a/04_data_analysis_files/figure-revealjs/fig-boxs-3.png b/04_data_analysis_files/figure-revealjs/fig-boxs-3.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b8fde2d151be5b30e4d2803a304ba0def0d2914
Binary files /dev/null and b/04_data_analysis_files/figure-revealjs/fig-boxs-3.png differ
diff --git a/04_data_analysis_files/figure-revealjs/fig-boxs-7.png b/04_data_analysis_files/figure-revealjs/fig-boxs-7.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b8fde2d151be5b30e4d2803a304ba0def0d2914
Binary files /dev/null and b/04_data_analysis_files/figure-revealjs/fig-boxs-7.png differ
diff --git a/04_data_analysis_files/figure-revealjs/fig-covarbars-5.png b/04_data_analysis_files/figure-revealjs/fig-covarbars-5.png
new file mode 100644
index 0000000000000000000000000000000000000000..eaecd87b18e6a6b62d1cac6bb2232b569511b642
Binary files /dev/null and b/04_data_analysis_files/figure-revealjs/fig-covarbars-5.png differ
diff --git a/04_data_analysis_files/figure-revealjs/fig-heatmap-9.png b/04_data_analysis_files/figure-revealjs/fig-heatmap-9.png
new file mode 100644
index 0000000000000000000000000000000000000000..23fbfa8a33aa00811331d35f804a2526905f7aa3
Binary files /dev/null and b/04_data_analysis_files/figure-revealjs/fig-heatmap-9.png differ
diff --git a/04_data_analysis_files/figure-revealjs/fig-hist-1.png b/04_data_analysis_files/figure-revealjs/fig-hist-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..4d396c98a31acdb4bddf2cc161a54648d46feacb
Binary files /dev/null and b/04_data_analysis_files/figure-revealjs/fig-hist-1.png differ
diff --git a/04_data_analysis_files/figure-revealjs/fig-scatter-11.png b/04_data_analysis_files/figure-revealjs/fig-scatter-11.png
new file mode 100644
index 0000000000000000000000000000000000000000..665e4c669e8fe688b34f92199a9e3e5b08d52997
Binary files /dev/null and b/04_data_analysis_files/figure-revealjs/fig-scatter-11.png differ
diff --git a/05_data_fellacies.qmd b/05_data_fellacies.qmd
index fb325a8306976dbb4d7260587d96f77aa64e4746..04c8bb4db94ebec59d9186342228cd3379f74070 100644
--- a/05_data_fellacies.qmd
+++ b/05_data_fellacies.qmd
@@ -1,6 +1,6 @@
 ---
 title: "Data Literacy"
-subtitle: "Chapter 6: Data Storytelling"
+subtitle: "Chapter 5: Data Fellacies"
 author: Prof. Dr. Michael BÃ¼cker
 number-offset: [6,0]
 bibliography: references.bib
@@ -11,11 +11,11 @@ bibliography: references.bib
 
 ## Where are we? {.unnumbered}
 
-![](img/data_storytelling.png)
+![](img/data_analysis.png)
 
 
 
-# Data storytelling {background-color="#0014a0"}
+# Data fellacies {background-color="#0014a0"}
 
 ::: footer
 :::
@@ -31,6 +31,56 @@ bibliography: references.bib
 :::
 
 
+## Survivorship bias
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+![Visualization of the survivorship bias (*Source:* [https://www.geckoboard.com/best-practice/statistical-fallacies/](https://www.geckoboard.com/best-practice/statistical-fallacies/))](img/fellacies_survivorshipbias.png){#fig-survivorshipbias}
+
+:::
+::::
+
+
+
+## False causality
+
+:::: {.columns}
+
+::: {.column width="47.5%"}
+[https://www.tylervigen.com/spurious-correlations](https://www.tylervigen.com/spurious-correlations)
+:::
+
+::: {.column width="5%"}
+
+:::
+
+::: {.column width="47.5%"}
+![Visualization of false causality (*Source:* [https://www.geckoboard.com/best-practice/statistical-fallacies/](https://www.geckoboard.com/best-practice/statistical-fallacies/))](img/fellacies_falsecausality.png){#fig-falsecausality}
+
+:::
+::::
+
+
+
+## Exercise 
+
+::: callout-caution
+## Exercise
+
+Please analyze the following use case
+
+
+
+:::
 
 
 # References {.unnumbered .scrollable}
diff --git a/06_data_storytelling.qmd b/06_data_storytelling.qmd
index 04c8bb4db94ebec59d9186342228cd3379f74070..fb325a8306976dbb4d7260587d96f77aa64e4746 100644
--- a/06_data_storytelling.qmd
+++ b/06_data_storytelling.qmd
@@ -1,6 +1,6 @@
 ---
 title: "Data Literacy"
-subtitle: "Chapter 5: Data Fellacies"
+subtitle: "Chapter 6: Data Storytelling"
 author: Prof. Dr. Michael BÃ¼cker
 number-offset: [6,0]
 bibliography: references.bib
@@ -11,11 +11,11 @@ bibliography: references.bib
 
 ## Where are we? {.unnumbered}
 
-![](img/data_analysis.png)
+![](img/data_storytelling.png)
 
 
 
-# Data fellacies {background-color="#0014a0"}
+# Data storytelling {background-color="#0014a0"}
 
 ::: footer
 :::
@@ -31,56 +31,6 @@ bibliography: references.bib
 :::
 
 
-## Survivorship bias
-
-:::: {.columns}
-
-::: {.column width="47.5%"}
-
-:::
-
-::: {.column width="5%"}
-
-:::
-
-::: {.column width="47.5%"}
-![Visualization of the survivorship bias (*Source:* [https://www.geckoboard.com/best-practice/statistical-fallacies/](https://www.geckoboard.com/best-practice/statistical-fallacies/))](img/fellacies_survivorshipbias.png){#fig-survivorshipbias}
-
-:::
-::::
-
-
-
-## False causality
-
-:::: {.columns}
-
-::: {.column width="47.5%"}
-[https://www.tylervigen.com/spurious-correlations](https://www.tylervigen.com/spurious-correlations)
-:::
-
-::: {.column width="5%"}
-
-:::
-
-::: {.column width="47.5%"}
-![Visualization of false causality (*Source:* [https://www.geckoboard.com/best-practice/statistical-fallacies/](https://www.geckoboard.com/best-practice/statistical-fallacies/))](img/fellacies_falsecausality.png){#fig-falsecausality}
-
-:::
-::::
-
-
-
-## Exercise 
-
-::: callout-caution
-## Exercise
-
-Please analyze the following use case
-
-
-
-:::
 
 
 # References {.unnumbered .scrollable}
diff --git a/output/04_data_analysis.html b/output/04_data_analysis.html
index b08f7bebd4f4b615c55056630e4a1866580f8653..f4e3358e8aa30951cc7a99d727f444580fd5fe46 100644
--- a/output/04_data_analysis.html
+++ b/output/04_data_analysis.html
@@ -29,6 +29,71 @@
       margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
       vertical-align: middle;
     }
+    /* CSS for syntax highlighting */
+    pre > code.sourceCode { white-space: pre; position: relative; }
+    pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
+    pre > code.sourceCode > span:empty { height: 1.2em; }
+    .sourceCode { overflow: visible; }
+    code.sourceCode > span { color: inherit; text-decoration: inherit; }
+    div.sourceCode { margin: 1em 0; }
+    pre.sourceCode { margin: 0; }
+    @media screen {
+    div.sourceCode { overflow: auto; }
+    }
+    @media print {
+    pre > code.sourceCode { white-space: pre-wrap; }
+    pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+    }
+    pre.numberSource code
+      { counter-reset: source-line 0; }
+    pre.numberSource code > span
+      { position: relative; left: -4em; counter-increment: source-line; }
+    pre.numberSource code > span > a:first-child::before
+      { content: counter(source-line);
+        position: relative; left: -1em; text-align: right; vertical-align: baseline;
+        border: none; display: inline-block;
+        -webkit-touch-callout: none; -webkit-user-select: none;
+        -khtml-user-select: none; -moz-user-select: none;
+        -ms-user-select: none; user-select: none;
+        padding: 0 4px; width: 4em;
+        color: #aaaaaa;
+      }
+    pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
+    div.sourceCode
+      { color: #003b4f; background-color: #f1f3f5; }
+    @media screen {
+    pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+    }
+    code span { color: #003b4f; } /* Normal */
+    code span.al { color: #ad0000; } /* Alert */
+    code span.an { color: #5e5e5e; } /* Annotation */
+    code span.at { color: #657422; } /* Attribute */
+    code span.bn { color: #ad0000; } /* BaseN */
+    code span.bu { } /* BuiltIn */
+    code span.cf { color: #003b4f; } /* ControlFlow */
+    code span.ch { color: #20794d; } /* Char */
+    code span.cn { color: #8f5902; } /* Constant */
+    code span.co { color: #5e5e5e; } /* Comment */
+    code span.cv { color: #5e5e5e; font-style: italic; } /* CommentVar */
+    code span.do { color: #5e5e5e; font-style: italic; } /* Documentation */
+    code span.dt { color: #ad0000; } /* DataType */
+    code span.dv { color: #ad0000; } /* DecVal */
+    code span.er { color: #ad0000; } /* Error */
+    code span.ex { } /* Extension */
+    code span.fl { color: #ad0000; } /* Float */
+    code span.fu { color: #4758ab; } /* Function */
+    code span.im { color: #00769e; } /* Import */
+    code span.in { color: #5e5e5e; } /* Information */
+    code span.kw { color: #003b4f; } /* Keyword */
+    code span.op { color: #5e5e5e; } /* Operator */
+    code span.ot { color: #003b4f; } /* Other */
+    code span.pp { color: #ad0000; } /* Preprocessor */
+    code span.sc { color: #5e5e5e; } /* SpecialChar */
+    code span.ss { color: #20794d; } /* SpecialString */
+    code span.st { color: #20794d; } /* String */
+    code span.va { color: #111111; } /* Variable */
+    code span.vs { color: #20794d; } /* VerbatimString */
+    code span.wa { color: #5e5e5e; font-style: italic; } /* Warning */
     /* CSS for citations */
     div.csl-bib-body { }
     div.csl-entry {
@@ -50,6 +115,22 @@
       margin-left: 2em;
     }  </style>
   <link rel="stylesheet" href="04_data_analysis_files/libs/revealjs/dist/theme/quarto.css">
+  <script>window.backupDefine = window.define; window.define = undefined;</script><script src="https://cdn.jsdelivr.net/npm/katex@0.15.1/dist/katex.min.js"></script>
+  <script>document.addEventListener("DOMContentLoaded", function () {
+ var mathElements = document.getElementsByClassName("math");
+ var macros = [];
+ for (var i = 0; i < mathElements.length; i++) {
+  var texText = mathElements[i].firstChild;
+  if (mathElements[i].tagName == "SPAN") {
+   katex.render(texText.data, mathElements[i], {
+    displayMode: mathElements[i].classList.contains('display'),
+    throwOnError: false,
+    macros: macros,
+    fleqn: false
+   });
+}}});
+  </script>
+  <script>window.define = window.backupDefine; window.backupDefine = undefined;</script><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.15.1/dist/katex.min.css">
   <link href="04_data_analysis_files/libs/revealjs/plugin/quarto-line-highlight/line-highlight.css" rel="stylesheet">
   <link href="04_data_analysis_files/libs/revealjs/plugin/reveal-menu/menu.css" rel="stylesheet">
   <link href="04_data_analysis_files/libs/revealjs/plugin/reveal-menu/quarto-menu.css" rel="stylesheet">
@@ -367,9 +448,10 @@ Prof.&nbsp;Dr.&nbsp;Michael BÃ¼cker
 <h2 id="toc-title">Table of contents</h2>
 <ul>
 <li><a href="#/analytics-methods" id="/toc-analytics-methods"><span class="header-section-number">5.1</span> Analytics methods</a></li>
-<li><a href="#/key-figures" id="/toc-key-figures"><span class="header-section-number">5.2</span> Key figures</a></li>
-<li><a href="#/best-practices" id="/toc-best-practices"><span class="header-section-number">5.3</span> Best practices</a></li>
-<li><a href="#/exercise" id="/toc-exercise"><span class="header-section-number">5.4</span> Exercise</a></li>
+<li><a href="#/explorative-data-analysis-eda" id="/toc-explorative-data-analysis-eda"><span class="header-section-number">5.2</span> Explorative Data Analysis (EDA)</a></li>
+<li><a href="#/tidy-data" id="/toc-tidy-data"><span class="header-section-number">5.3</span> Tidy data</a></li>
+<li><a href="#/variation" id="/toc-variation"><span class="header-section-number">5.4</span> Variation</a></li>
+<li><a href="#/covaration" id="/toc-covaration"><span class="header-section-number">5.5</span> Covaration</a></li>
 <li><a href="#/references" id="/toc-references">References</a></li>
 </ul>
 </nav>
@@ -821,74 +903,616 @@ Prof.&nbsp;Dr.&nbsp;Michael BÃ¼cker
 </div>
 </section></section>
 <section>
-<section id="key-figures" class="title-slide slide level2 center" data-background-color="#0014a0" data-number="5.2">
-<h2><span class="header-section-number">5.2</span> Key figures</h2>
+<section id="explorative-data-analysis-eda" class="title-slide slide level2 center" data-background-color="#0014a0" data-number="5.2">
+<h2><span class="header-section-number">5.2</span> Explorative Data Analysis (EDA)</h2>
 <div class="footer">
 
 </div>
 </section>
-<section id="averages" class="slide level3" data-number="5.2.1">
-<h3><span class="header-section-number">5.2.1</span> Averages</h3>
-</section>
-<section id="variation" class="slide level3" data-number="5.2.2">
-<h3><span class="header-section-number">5.2.2</span> Variation</h3>
+<section id="introduction-to-eda" class="slide level3" data-number="5.2.1">
+<h3><span class="header-section-number">5.2.1</span> Introduction to EDA</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<h5 id="eda-is-an-iterative-process">EDA is an iterative process:</h5>
+<ol type="1">
+<li>Generate questions about your data.</li>
+<li>Search for answers by visualising, transforming, and modelling your data.</li>
+<li>Use what you learn to refine your questions and/or generate new questions.</li>
+</ol>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<h5 id="eda-is-a-state-of-mind">EDA is a state of mind:</h5>
+<ul>
+<li>EDA is not a formal process with a strict set of rules</li>
+<li>During the initial phases of EDA you should feel free to investigate every idea that occurs to you</li>
+<li>Some of these ideas will pan out, and some will be dead ends</li>
+<li>As your exploration continues, you will home in on a few particularly productive areas that youâ€™ll eventually write up and communicate to others</li>
+</ul>
+</div>
+</div>
 </section>
-<section id="ratios" class="slide level3" data-number="5.2.3">
-<h3><span class="header-section-number">5.2.3</span> Ratios</h3>
+<section id="definitions" class="slide level3" data-number="5.2.2">
+<h3><span class="header-section-number">5.2.2</span> Definitions</h3>
+<ul>
+<li><p>A <strong>variable</strong> is a quantity, quality, or property that you can measure.</p></li>
+<li><p>A <strong>value</strong> is the state of a variable when you measure it. The value of a variable may change from measurement to measurement.</p></li>
+<li><p>An <strong>observation</strong> is a set of measurements made under similar conditions (you usually make all of the measurements in an observation at the same time and on the same object). An observation will contain several values, each associated with a different variable. Iâ€™ll sometimes refer to an observation as a data point.</p></li>
+<li><p><strong>Tabular data</strong> is a set of values, each associated with a variable and an observation. Tabular data is tidy if each value is placed in its own â€œcellâ€, each variable in its own column, and each observation in its own row.</p></li>
+</ul>
+</section></section>
+<section>
+<section id="tidy-data" class="title-slide slide level2 center" data-background-color="#0014a0" data-number="5.3">
+<h2><span class="header-section-number">5.3</span> Tidy data</h2>
+<div class="footer">
+
+</div>
 </section>
-<section id="dependenciesrelationships" class="slide level3" data-number="5.2.4">
-<h3><span class="header-section-number">5.2.4</span> Dependencies/relationships</h3>
+<section id="tidy-data-1" class="slide level3" data-number="5.3.1">
+<h3><span class="header-section-number">5.3.1</span> Tidy data</h3>
+<p>Starting point for any data analysis should be a tidy dataset. You can represent the same underlying data in multiple ways. There are three interrelated rules which make a dataset tidy:</p>
+<ol type="1">
+<li>Each variable must have its own column.</li>
+<li>Each observation must have its own row.</li>
+<li>Each value must have its own cell.</li>
+</ol>
+
+<img data-src="https://bookdown.org/joone/ComputationalMethods/img/7/tidy.png" class="r-stretch quarto-figure-center"><p class="caption">Figure&nbsp;5.1: Following three rules makes a dataset tidy: variables are in columns, observations are in rows, and values are in cells. (<em>Source:</em> <span class="citation" data-cites="Wickham2017R">Wickham and Grolemund (<a href="#/references" role="doc-biblioref" onclick="">2017</a>)</span>)</p></section>
+<section id="making-data-tidy" class="slide level3" data-number="5.3.2">
+<h3><span class="header-section-number">5.3.2</span> Making data tidy</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<div id="fig-tidy1" class="quarto-figure quarto-figure-center">
+<figure>
+<p><img data-src="https://d33wubrfki0l68.cloudfront.net/3aea19108d39606bbe49981acda07696c0c7fcd8/2de65/images/tidy-9.png"></p>
+<figcaption>Figure&nbsp;5.2: Pivoting into a long tidy form. (<em>Source:</em> <span class="citation" data-cites="Wickham2017R">Wickham and Grolemund (<a href="#/references" role="doc-biblioref" onclick="">2017</a>)</span>)</figcaption>
+</figure>
+</div>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<div id="fig-tidy2" class="quarto-figure quarto-figure-center">
+<figure>
+<p><img data-src="https://d33wubrfki0l68.cloudfront.net/8350f0dda414629b9d6c354f87acf5c5f722be43/bcb84/images/tidy-8.png"></p>
+<figcaption>Figure&nbsp;5.3: Pivoting into a wide tidy form. (<em>Source:</em> <span class="citation" data-cites="Wickham2017R">Wickham and Grolemund (<a href="#/references" role="doc-biblioref" onclick="">2017</a>)</span>)</figcaption>
+</figure>
+</div>
+</div>
+</div>
 </section>
-<section id="correlation" class="slide level3" data-number="5.2.5">
-<h3><span class="header-section-number">5.2.5</span> Correlation</h3>
+<section id="describe-tables-with-pandas" class="slide level3" data-number="5.3.3">
+<h3><span class="header-section-number">5.3.3</span> Describe tables with pandas</h3>
+<div class="columns">
+<div class="column" style="width:37.5%;">
+<ul>
+<li>In order to get an overview on a table, you can use the <code>describe</code> method</li>
+<li>By default, describe computes 7 summary figures for numeric columns (mean, standard deviation, min, max and quartiles)</li>
+<li>The argument <code>include='all'</code> adds an overview on categorical variables</li>
+</ul>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:57.5%;">
+<h5 id="example-dvd-rental">Example: DVD Rental</h5>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="co"># Import tables</span></span>
+<span id="cb1-2"><a href="#cb1-2"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb1-3"><a href="#cb1-3"></a>dvdrental <span class="op">=</span> get_all_tables(<span class="st">"dvdrental"</span>)</span>
+<span id="cb1-4"><a href="#cb1-4"></a>dvdrental.list_tables()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>['film', 'actor', 'address', 'category', 'city', 'country', 'customer', 'film_actor', 'film_category', 'inventory', 'language', 'payment', 'rental', 'staff', 'store']</code></pre>
+</div>
+</div>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a><span class="co"># Describe customer table</span></span>
+<span id="cb3-2"><a href="#cb3-2"></a>dvdrental.customer.describe(include<span class="op">=</span><span class="st">'all'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>        customer_id    store_id  ...                 last_update      active
+count    599.000000  599.000000  ...                         599  599.000000
+unique          NaN         NaN  ...                           1         NaN
+top             NaN         NaN  ...  2013-05-26 14:49:45.738000         NaN
+freq            NaN         NaN  ...                         599         NaN
+first           NaN         NaN  ...  2013-05-26 14:49:45.738000         NaN
+last            NaN         NaN  ...  2013-05-26 14:49:45.738000         NaN
+mean     300.000000    1.455760  ...                         NaN    0.974958
+std      173.060683    0.498455  ...                         NaN    0.156382
+min        1.000000    1.000000  ...                         NaN    0.000000
+25%      150.500000    1.000000  ...                         NaN    1.000000
+50%      300.000000    1.000000  ...                         NaN    1.000000
+75%      449.500000    2.000000  ...                         NaN    1.000000
+max      599.000000    2.000000  ...                         NaN    1.000000
+
+[13 rows x 10 columns]</code></pre>
+</div>
+</div>
+</div>
+</div>
 </section>
-<section id="non-linear-relationships" class="slide level3" data-number="5.2.6">
-<h3><span class="header-section-number">5.2.6</span> Non-linear relationships</h3>
+<section id="mergejoin-tables-with-pandas" class="slide level3" data-number="5.3.4">
+<h3><span class="header-section-number">5.3.4</span> Merge/join tables with pandas</h3>
+<ul>
+<li>To join two tables, the <code>merge</code> method from Pythonâ€™s pandas library can be used:</li>
+<li><strong>Syntax &amp; Parameters:</strong>
+<ul>
+<li><code>pd.merge(left, right, how='inner', on=None, left_on=None, right_on=None, ...)</code></li>
+<li><code>left</code>: First DataFrame to join.</li>
+<li><code>right</code>: Second DataFrame to join.</li>
+<li><code>how</code>: Type of merge to perform (e.g., â€˜innerâ€™, â€˜outerâ€™, â€˜leftâ€™, â€˜rightâ€™).</li>
+<li><code>on</code>: Column name(s) to join on. Must be found in both DataFrames.</li>
+<li><code>left_on</code>: Column(s) from the left DataFrame to use as keys.</li>
+<li><code>right_on</code>: Column(s) from the right DataFrame to use as keys.</li>
+</ul></li>
+<li><strong>Join Types:</strong>
+<ul>
+<li><code>inner</code>: Only rows with matching keys in both DataFrames are included.</li>
+<li><code>outer</code>: All rows from both DataFrames are included, with NaN for missing keys.</li>
+<li><code>left</code>: All rows from the left DataFrame are included, along with matched rows from the right DataFrame.</li>
+<li><code>right</code>: All rows from the right DataFrame are included, along with matched rows from the left DataFrame.</li>
+</ul></li>
+</ul>
 </section>
-<section id="contingency" class="slide level3" data-number="5.2.7">
-<h3><span class="header-section-number">5.2.7</span> Contingency</h3>
+<section id="example-mergejoin-tables-with-pandas" class="slide level3" data-number="5.3.5">
+<h3><span class="header-section-number">5.3.5</span> Example: Merge/join tables with pandas</h3>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a><span class="co"># Merge customer, rental and payment tables</span></span>
+<span id="cb5-2"><a href="#cb5-2"></a>df <span class="op">=</span> (</span>
+<span id="cb5-3"><a href="#cb5-3"></a>dvdrental.customer.merge(</span>
+<span id="cb5-4"><a href="#cb5-4"></a>  dvdrental.rental,</span>
+<span id="cb5-5"><a href="#cb5-5"></a>  on <span class="op">=</span> [<span class="st">'customer_id'</span>]</span>
+<span id="cb5-6"><a href="#cb5-6"></a>).merge(</span>
+<span id="cb5-7"><a href="#cb5-7"></a>  dvdrental.payment,</span>
+<span id="cb5-8"><a href="#cb5-8"></a>  on <span class="op">=</span> [<span class="st">'rental_id'</span>, <span class="st">'customer_id'</span>]</span>
+<span id="cb5-9"><a href="#cb5-9"></a>)</span>
+<span id="cb5-10"><a href="#cb5-10"></a>)</span>
+<span id="cb5-11"><a href="#cb5-11"></a></span>
+<span id="cb5-12"><a href="#cb5-12"></a>df.describe(include<span class="op">=</span><span class="st">'all'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>         customer_id      store_id  ...        amount                payment_date
+count   14592.000000  14592.000000  ...  14592.000000                       14592
+unique           NaN           NaN  ...           NaN                       14362
+top              NaN           NaN  ...           NaN  2007-05-14 13:44:29.996577
+freq             NaN           NaN  ...           NaN                         182
+first            NaN           NaN  ...           NaN  2007-02-14 21:21:59.996577
+last             NaN           NaN  ...           NaN  2007-05-14 13:44:29.996577
+mean      297.369929      1.454701  ...      4.201143                         NaN
+std       174.186580      0.497961  ...      2.368979                         NaN
+min         1.000000      1.000000  ...      0.000000                         NaN
+25%       145.000000      1.000000  ...      2.990000                         NaN
+50%       295.000000      1.000000  ...      3.990000                         NaN
+75%       449.000000      2.000000  ...      4.990000                         NaN
+max       599.000000      2.000000  ...     11.990000                         NaN
+
+[13 rows x 20 columns]</code></pre>
+</div>
+</div>
 </section>
-<section id="difference-in-differences" class="slide level3" data-number="5.2.8">
-<h3><span class="header-section-number">5.2.8</span> Difference-in-Differences</h3>
+<section id="aggregate-tables-with-pandas" class="slide level3" data-number="5.3.6">
+<h3><span class="header-section-number">5.3.6</span> Aggregate tables with pandas</h3>
+<ul>
+<li><p>In pandas, the <code>agg</code> function is used for aggregation operations, allowing for multiple statistics to be calculated simultaneously.</p></li>
+<li><p><strong>Syntax &amp; Parameters:</strong></p>
+<ul>
+<li><code>DataFrame.agg(func, axis=0, *args, **kwargs)</code></li>
+<li><code>func</code>: Function, string function name, list of functions, or dict of column names to functions.</li>
+<li><code>axis</code>: Axis along which the function is applied (0 for columns, 1 for rows).</li>
+</ul></li>
+<li><p><strong>Function Input Varieties:</strong></p>
+<ul>
+<li>Single function (e.g., <code>'sum'</code>)</li>
+<li>List of functions (e.g., <code>['sum', 'mean']</code>)</li>
+<li>Dictionary mapping columns to functions (e.g., <code>{'col1': 'sum', 'col2': ['mean', 'std']}</code>)</li>
+</ul></li>
+<li><p><strong>Behavior with GroupBy:</strong></p>
+<ul>
+<li>Often used in conjunction with <code>groupby</code> to perform grouped aggregations.</li>
+</ul></li>
+</ul>
 </section>
-<section id="causality" class="slide level3" data-number="5.2.9">
-<h3><span class="header-section-number">5.2.9</span> Causality</h3>
+<section id="example-aggregate-tables-with-pandas" class="slide level3" data-number="5.3.7">
+<h3><span class="header-section-number">5.3.7</span> Example: Aggregate tables with pandas</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<div class="cell">
+<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1"></a><span class="co"># Merge rental, film and actor</span></span>
+<span id="cb7-2"><a href="#cb7-2"></a>df <span class="op">=</span> (</span>
+<span id="cb7-3"><a href="#cb7-3"></a>dvdrental.rental</span>
+<span id="cb7-4"><a href="#cb7-4"></a>.merge(dvdrental.inventory, on <span class="op">=</span> [<span class="st">'inventory_id'</span>])</span>
+<span id="cb7-5"><a href="#cb7-5"></a>.merge(dvdrental.film, on <span class="op">=</span> [<span class="st">'film_id'</span>])</span>
+<span id="cb7-6"><a href="#cb7-6"></a>.merge(dvdrental.film_actor, on <span class="op">=</span> [<span class="st">'film_id'</span>])</span>
+<span id="cb7-7"><a href="#cb7-7"></a>.merge(dvdrental.actor, on <span class="op">=</span> [<span class="st">'actor_id'</span>])</span>
+<span id="cb7-8"><a href="#cb7-8"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<div class="cell">
+<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1"></a><span class="co"># Count rentals per actor</span></span>
+<span id="cb8-2"><a href="#cb8-2"></a>(</span>
+<span id="cb8-3"><a href="#cb8-3"></a>  df</span>
+<span id="cb8-4"><a href="#cb8-4"></a>  .groupby(by <span class="op">=</span> [<span class="st">'actor_id'</span>, <span class="st">'last_name'</span>, <span class="st">'first_name'</span>], </span>
+<span id="cb8-5"><a href="#cb8-5"></a>           as_index <span class="op">=</span> <span class="va">False</span>)</span>
+<span id="cb8-6"><a href="#cb8-6"></a>  .agg(count_rentals<span class="op">=</span>(<span class="st">'rental_id'</span>, <span class="st">'count'</span>))</span>
+<span id="cb8-7"><a href="#cb8-7"></a>  .sort_values(by <span class="op">=</span> <span class="st">'count_rentals'</span>, ascending<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb8-8"><a href="#cb8-8"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>     actor_id    last_name first_name  count_rentals
+106       107    Degeneres       Gina            753
+180       181       Carrey    Matthew            678
+197       198       Keitel       Mary            674
+143       144  Witherspoon     Angela            654
+101       102         Torn     Walter            640
+..        ...          ...        ...            ...
+34         35         Dean       Judy            255
+198       199      Fawcett      Julia            255
+30         31     Sobieski      Sissy            235
+185       186    Zellweger      Julia            221
+147       148          Dee      Emily            216
+
+[200 rows x 4 columns]</code></pre>
+</div>
+</div>
+</div>
+</div>
 </section></section>
 <section>
-<section id="best-practices" class="title-slide slide level2 center" data-background-color="#0014a0" data-number="5.3">
-<h2><span class="header-section-number">5.3</span> Best practices</h2>
+<section id="variation" class="title-slide slide level2 center" data-background-color="#0014a0" data-number="5.4">
+<h2><span class="header-section-number">5.4</span> Variation</h2>
 <div class="footer">
 
 </div>
 </section>
-<section id="definition" class="slide level3" data-number="5.3.1">
-<h3><span class="header-section-number">5.3.1</span> Definition</h3>
+<section id="variation-1" class="slide level3" data-number="5.4.1">
+<h3><span class="header-section-number">5.4.1</span> Variation</h3>
+<ul>
+<li><strong>Variation Definition:</strong> Tendency of variable values to change with each measurement.</li>
+<li><strong>Continuous Variables:</strong> Each measurement likely yields slightly different results.</li>
+<li><strong>Constant Quantities:</strong> Even with constants like the speed of light, measurements vary due to errors.</li>
+<li><strong>Categorical Variables Variation:</strong>
+<ul>
+<li>Different subjects, e.g., various peopleâ€™s eye colors.</li>
+<li>Different times, e.g., an electronâ€™s energy levels at varied moments.</li>
+</ul></li>
+<li><strong>Pattern of Variation:</strong> Each variable has a unique pattern that can provide insightful information.</li>
+<li><strong>Understanding Patterns:</strong> Visualizing a variableâ€™s distribution helps understand its variation pattern.</li>
+</ul>
+</section>
+<section id="histograms" class="slide level3" data-number="5.4.2">
+<h3><span class="header-section-number">5.4.2</span> Histograms</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<ul>
+<li>A <strong>histogram</strong> visualizes the distribution of continuous variables</li>
+<li>A bar for each bin represents the frequency of observations in this bin</li>
+<li>Bins can be specified in different variants (e.g.&nbsp;same width vs.&nbsp;varying width)</li>
+</ul>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1"></a><span class="co"># Compute revenue per customer</span></span>
+<span id="cb10-2"><a href="#cb10-2"></a>df <span class="op">=</span> (</span>
+<span id="cb10-3"><a href="#cb10-3"></a>dvdrental.customer</span>
+<span id="cb10-4"><a href="#cb10-4"></a>.merge(dvdrental.rental, on <span class="op">=</span> [<span class="st">'customer_id'</span>])</span>
+<span id="cb10-5"><a href="#cb10-5"></a>.merge(dvdrental.payment, on <span class="op">=</span> [<span class="st">'rental_id'</span>, <span class="st">'customer_id'</span>])</span>
+<span id="cb10-6"><a href="#cb10-6"></a>.groupby(<span class="st">'customer_id'</span>)</span>
+<span id="cb10-7"><a href="#cb10-7"></a>.agg(revenue<span class="op">=</span>(<span class="st">'amount'</span>, <span class="st">'sum'</span>))</span>
+<span id="cb10-8"><a href="#cb10-8"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<p>The <code>matplotlib</code> library offers a function <code>hist</code> to draw histograms:</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
+<span id="cb11-2"><a href="#cb11-2"></a>plt.hist(df[<span class="st">'revenue'</span>], bins <span class="op">=</span> <span class="dv">10</span>)</span>
+<span id="cb11-3"><a href="#cb11-3"></a>plt.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-hist" class="quarto-figure quarto-figure-center">
+<figure>
+<p><img data-src="04_data_analysis_files/figure-revealjs/fig-hist-1.png" width="614"></p>
+<figcaption>Figure&nbsp;5.4: Histogram of revenues per cusotmer in the DVD Rental data base</figcaption>
+</figure>
+</div>
+</div>
+</div>
+</div>
+</div>
+</section>
+<section id="bar-plots" class="slide level3" data-number="5.4.3">
+<h3><span class="header-section-number">5.4.3</span> Bar plots</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<ul>
+<li>In a <em>bar plot</em>, the height of a bar represents the frequency of values of a categorical variable</li>
+<li>Bars can show absolute or relative frequencies</li>
+</ul>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1"></a><span class="co"># Compute rentals per staff member</span></span>
+<span id="cb12-2"><a href="#cb12-2"></a>df <span class="op">=</span> (</span>
+<span id="cb12-3"><a href="#cb12-3"></a>dvdrental.customer</span>
+<span id="cb12-4"><a href="#cb12-4"></a>.merge(dvdrental.rental, on <span class="op">=</span> [<span class="st">'customer_id'</span>], </span>
+<span id="cb12-5"><a href="#cb12-5"></a>       suffixes <span class="op">=</span> (<span class="st">'_customer'</span>, <span class="st">'_rental'</span>))</span>
+<span id="cb12-6"><a href="#cb12-6"></a>.merge(dvdrental.staff, on <span class="op">=</span> [<span class="st">'staff_id'</span>],</span>
+<span id="cb12-7"><a href="#cb12-7"></a>      suffixes <span class="op">=</span> (<span class="st">'_customer'</span>, <span class="st">'_staff'</span>))</span>
+<span id="cb12-8"><a href="#cb12-8"></a>.groupby([<span class="st">'staff_id'</span>, <span class="st">'first_name_staff'</span>, </span>
+<span id="cb12-9"><a href="#cb12-9"></a>          <span class="st">'last_name_staff'</span>, <span class="st">'email_staff'</span>], </span>
+<span id="cb12-10"><a href="#cb12-10"></a>          as_index<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb12-11"><a href="#cb12-11"></a>.agg(rentals_count<span class="op">=</span>(<span class="st">'rental_id'</span>, <span class="st">'count'</span>))</span>
+<span id="cb12-12"><a href="#cb12-12"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<p>The <code>matplotlib</code> library offers a function <code>bar</code> to draw histograms:</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1"></a>plt.bar(df[<span class="st">'first_name_staff'</span>], df[<span class="st">'rentals_count'</span>])</span>
+<span id="cb13-2"><a href="#cb13-2"></a>plt.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-bar" class="quarto-figure quarto-figure-center">
+<figure>
+<p><img data-src="04_data_analysis_files/figure-revealjs/fig-bar-3.png" width="960"></p>
+<figcaption>Figure&nbsp;5.5: Barplot of count of rentals per staff member in the DVD Rental data base</figcaption>
+</figure>
+</div>
+</div>
+</div>
+</div>
+</div>
+</section>
+<section id="analyzing-variationdistributions" class="slide level3" data-number="5.4.4">
+<h3><span class="header-section-number">5.4.4</span> Analyzing variation/distributions</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<h5 id="questions-to-ask">Questions to ask:</h5>
+<ol type="1">
+<li>Which values are the most common? Why?</li>
+<li>Which values are rare? Why? Does that match your expectations?</li>
+<li>Can you see any unusual patterns? What might explain them?</li>
+</ol>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<h5 id="kpis-describing-the-distribution">KPIs describing the distribution:</h5>
+<ul>
+<li>Frequencies (absolute, relative)</li>
+<li>Average</li>
+<li>Standard Deviation, Variance, Median Absolute Deviation</li>
+<li>Skewness, Kurtosis</li>
+<li>Outliers</li>
+</ul>
+</div>
+</div>
 </section></section>
 <section>
-<section id="exercise" class="title-slide slide level2 center" data-background-color="#0014a0" data-number="5.4">
-<h2><span class="header-section-number">5.4</span> Exercise</h2>
+<section id="covaration" class="title-slide slide level2 center" data-background-color="#0014a0" data-number="5.5">
+<h2><span class="header-section-number">5.5</span> Covaration</h2>
+
+</section>
+<section id="covariation" class="slide level3" data-number="5.5.1">
+<h3><span class="header-section-number">5.5.1</span> Covariation</h3>
+<ul>
+<li><strong>Variation:</strong> describes the behavior within a variable</li>
+<li><strong>Covariation:</strong> describes the behavior between variables</li>
+<li>Covariation is the tendency for the values of two or more variables to vary together in a related way</li>
+<li>The best way to spot covariation is to visualise the relationship between two or more variables</li>
+<li>How you do that should again depend on the <strong>type</strong> of variables involved.</li>
+</ul>
+</section>
+<section id="covariation-of-a-cagtegorical-and-a-continuous-variable-12" class="slide level3" data-number="5.5.2">
+<h3><span class="header-section-number">5.5.2</span> Covariation of a cagtegorical and a continuous variable (1/2)</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<ul>
+<li>A <em>bar plot</em> can be used to compare aggregated values of a numeric values across categories from another variable</li>
+<li>Aggregations can be counts, sums, â€¦</li>
+</ul>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1"></a><span class="co"># Compute rentals per staff member</span></span>
+<span id="cb14-2"><a href="#cb14-2"></a>df <span class="op">=</span> (</span>
+<span id="cb14-3"><a href="#cb14-3"></a>dvdrental.customer</span>
+<span id="cb14-4"><a href="#cb14-4"></a>.merge(dvdrental.rental, on <span class="op">=</span> [<span class="st">'customer_id'</span>])</span>
+<span id="cb14-5"><a href="#cb14-5"></a>.merge(dvdrental.payment, on <span class="op">=</span> [<span class="st">'customer_id'</span>,<span class="st">'rental_id'</span>, <span class="st">'staff_id'</span>])</span>
+<span id="cb14-6"><a href="#cb14-6"></a>.merge(dvdrental.staff, on <span class="op">=</span> [<span class="st">'staff_id'</span>], suffixes <span class="op">=</span> (<span class="st">'_customer'</span>,<span class="st">'_staff'</span>))</span>
+<span id="cb14-7"><a href="#cb14-7"></a>.groupby([<span class="st">'staff_id'</span>, <span class="st">'first_name_staff'</span>, </span>
+<span id="cb14-8"><a href="#cb14-8"></a>          <span class="st">'last_name_staff'</span>, <span class="st">'email_staff'</span>], </span>
+<span id="cb14-9"><a href="#cb14-9"></a>          as_index<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb14-10"><a href="#cb14-10"></a>.agg(revenue<span class="op">=</span>(<span class="st">'amount'</span>, <span class="st">'sum'</span>))</span>
+<span id="cb14-11"><a href="#cb14-11"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<div class="cell">
+<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1"></a>plt.bar(df[<span class="st">'first_name_staff'</span>], df[<span class="st">'revenue'</span>])</span>
+<span id="cb15-2"><a href="#cb15-2"></a>plt.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-covarbars" class="quarto-figure quarto-figure-center">
+<figure>
+<p><img data-src="04_data_analysis_files/figure-revealjs/fig-covarbars-5.png" width="960"></p>
+<figcaption>Figure&nbsp;5.6: Barplot of count of rentals per staff member in the DVD Rental data base</figcaption>
+</figure>
+</div>
+</div>
+</div>
+</div>
+</div>
+</section>
+<section id="covariation-of-a-cagtegorical-and-a-continuous-variable-22" class="slide level3" data-number="5.5.3">
+<h3><span class="header-section-number">5.5.3</span> Covariation of a cagtegorical and a continuous variable (2/2)</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<ul>
+<li>Using <strong>boxplots</strong>, the entire distribution of a continuous variable can be compared across categories of another variable</li>
+</ul>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1"></a><span class="co"># Merge tables for revenues</span></span>
+<span id="cb16-2"><a href="#cb16-2"></a>df <span class="op">=</span> (</span>
+<span id="cb16-3"><a href="#cb16-3"></a>dvdrental.customer</span>
+<span id="cb16-4"><a href="#cb16-4"></a>.merge(dvdrental.rental, on <span class="op">=</span> [<span class="st">'customer_id'</span>], </span>
+<span id="cb16-5"><a href="#cb16-5"></a>       suffixes <span class="op">=</span> (<span class="st">'_customer'</span>, <span class="st">'_rental'</span>))</span>
+<span id="cb16-6"><a href="#cb16-6"></a>.merge(dvdrental.payment, on <span class="op">=</span> [<span class="st">'customer_id'</span>, </span>
+<span id="cb16-7"><a href="#cb16-7"></a>                                <span class="st">'rental_id'</span>, </span>
+<span id="cb16-8"><a href="#cb16-8"></a>                                <span class="st">'staff_id'</span>], </span>
+<span id="cb16-9"><a href="#cb16-9"></a>       suffixes <span class="op">=</span> (<span class="st">'_customer'</span>, <span class="st">'_rental'</span>))</span>
+<span id="cb16-10"><a href="#cb16-10"></a>.merge(dvdrental.staff, on <span class="op">=</span> [<span class="st">'staff_id'</span>],</span>
+<span id="cb16-11"><a href="#cb16-11"></a>      suffixes <span class="op">=</span> (<span class="st">'_customer'</span>, <span class="st">'_staff'</span>))    </span>
+<span id="cb16-12"><a href="#cb16-12"></a>)[[<span class="st">'first_name_staff'</span>, <span class="st">'amount'</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<p>The <code>seaborn</code> library offers a function <code>sns.boxplot</code> to easily draw multiple boxplots for comparison:</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb17-2"><a href="#cb17-2"></a>sns.boxplot(x <span class="op">=</span> <span class="st">'first_name_staff'</span>, y <span class="op">=</span> <span class="st">'amount'</span>, data <span class="op">=</span> df)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-boxs" class="quarto-figure quarto-figure-center">
+<figure>
+<p><img data-src="04_data_analysis_files/figure-revealjs/fig-boxs-7.png" width="960"></p>
+<figcaption>Figure&nbsp;5.7: Boxplots of revenues per rental separately by staff members</figcaption>
+</figure>
+</div>
+</div>
+</div>
+</div>
+</div>
+</section>
+<section id="covariation-of-two-cagtegorical-variables" class="slide level3" data-number="5.5.4">
+<h3><span class="header-section-number">5.5.4</span> Covariation of two cagtegorical variables</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<p>Create a table to identify the number of rentals per film category and country:</p>
+<div class="cell">
+<div class="cell-output cell-output-stdout">
+<pre><code>      country         name  count
+0   Australia       Action    516
+1   Australia    Animation    598
+2   Australia     Children    492
+3   Australia     Classics    492
+4   Australia       Comedy    439
+5   Australia  Documentary    600
+6   Australia        Drama    484
+7   Australia       Family    539
+8   Australia      Foreign    509
+9   Australia        Games    514
+10  Australia       Horror    460
+11  Australia        Music    394
+12  Australia          New    438
+13  Australia       Sci-Fi    580
+14  Australia       Sports    624
+15  Australia       Travel    442
+16     Canada       Action    596
+17     Canada    Animation    568
+18     Canada     Children    453
+19     Canada     Classics    447
+20     Canada       Comedy    502
+21     Canada  Documentary    450
+22     Canada        Drama    576
+23     Canada       Family    557
+24     Canada      Foreign    524
+25     Canada        Games    455
+26     Canada       Horror    386
+27     Canada        Music    436
+28     Canada          New    502
+29     Canada       Sci-Fi    521
+30     Canada       Sports    555
+31     Canada       Travel    395</code></pre>
+</div>
+</div>
+</div><div class="column" style="width:5%;">
 
+</div><div class="column" style="width:47.5%;">
+<p>The <code>seaborn</code> library offers a function <code>heatmap</code> to create heatmaps:</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1"></a>df_pivot <span class="op">=</span> df.pivot(index<span class="op">=</span><span class="st">"name"</span>, columns<span class="op">=</span><span class="st">"country"</span>, values<span class="op">=</span><span class="st">"count"</span>)</span>
+<span id="cb19-2"><a href="#cb19-2"></a>sns.heatmap(df_pivot)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-heatmap" class="quarto-figure quarto-figure-center">
+<figure>
+<p><img data-src="04_data_analysis_files/figure-revealjs/fig-heatmap-9.png" width="960"></p>
+<figcaption>Figure&nbsp;5.8: Heatmap for rentals across film categories and countries</figcaption>
+</figure>
+</div>
+</div>
+</div>
+</div>
+</div>
 </section>
-<section id="exercise-1" class="slide level3" data-number="5.4.1">
-<h3><span class="header-section-number">5.4.1</span> Exercise</h3>
-<div class="callout callout-caution callout-titled callout-style-default">
-<div class="callout-body">
-<div class="callout-title">
-<div class="callout-icon-container">
-<i class="callout-icon"></i>
+<section id="covariation-of-two-continuous-variables" class="slide level3" data-number="5.5.5">
+<h3><span class="header-section-number">5.5.5</span> Covariation of two continuous variables</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<p>Create a table with duration and amount paid</p>
+<div class="cell">
+<div class="cell-output cell-output-stdout">
+<pre><code>      rental_id  duration  amount
+0          1158  3.865278    2.99
+1          1164  7.964583    0.99
+2          1165  9.110417    3.99
+3          1166  8.227778    4.99
+4          1169  2.100000    2.99
+...         ...       ...     ...
+7263      16039  5.054861    2.99
+7264      16040  4.074306   11.99
+7265      16042  8.857639    2.99
+7266      16045  9.169444    0.99
+7267      16049  2.240278    3.99
+
+[7268 rows x 3 columns]</code></pre>
+</div>
+</div>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<p>The <code>pandas</code> library offers a method <code>plot.scatter</code> to create scatterplots:</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1"></a>df.plot.scatter(x <span class="op">=</span> <span class="st">'duration'</span>, y <span class="op">=</span> <span class="st">'amount'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-scatter" class="quarto-figure quarto-figure-center">
+<figure>
+<p><img data-src="04_data_analysis_files/figure-revealjs/fig-scatter-11.png" width="960"></p>
+<figcaption>Figure&nbsp;5.9: Scatterplot for rental duration and amount paid</figcaption>
+</figure>
 </div>
-<p><strong>Exercise</strong></p>
 </div>
-<div class="callout-content">
-<p>Please analyze the following use case</p>
 </div>
 </div>
+</div>
+</section>
+<section id="analyzing-covariationrelationshipsdependencies" class="slide level3" data-number="5.5.6">
+<h3><span class="header-section-number">5.5.6</span> Analyzing covariation/relationships/dependencies</h3>
+<div class="columns">
+<div class="column" style="width:47.5%;">
+<h5 id="questions-to-ask-1">Questions to ask:</h5>
+<ol type="1">
+<li>Which variables show common patterns? Are they reasonable/explainable?</li>
+<li>What form does the relationship have: is it linear, non-linear?</li>
+</ol>
+</div><div class="column" style="width:5%;">
+
+</div><div class="column" style="width:47.5%;">
+<h5 id="kpis-describing-the-distribution-1">KPIs describing the distribution:</h5>
+<ul>
+<li>Correlaion (Pearson, Spearman, Kendall)</li>
+<li>Contingency (<span class="math inline">\chi^2</span>, CramÃ©rs <span class="math inline">V</span>)</li>
+<li>â€¦</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="looking-back-data-preparation" class="slide level3" data-number="5.5.7">
+<h3><span class="header-section-number">5.5.7</span> Looking back: Data preparation</h3>
+
+<img data-src="img/featureengineering2.png" class="r-stretch quarto-figure-center"><p class="caption">Figure&nbsp;5.10: Visualization of the data preparation tasks</p></section>
+<section id="feature-engineering" class="slide level3" data-number="5.5.8">
+<h3><span class="header-section-number">5.5.8</span> Feature engineering</h3>
+
+<img data-src="img/featureengineering.png" class="r-stretch quarto-figure-center"><p class="caption">Figure&nbsp;5.11: Visualization of feature engineering tasks</p><div class="footer">
+
 </div>
 </section></section>
 <section id="references" class="title-slide slide level2 unnumbered scrollable smaller">
 <h2>References</h2>
-<div id="refs" role="list">
-
+<div id="refs" class="references csl-bib-body hanging-indent" role="list">
+<div id="ref-Wickham2017R" class="csl-entry" role="listitem">
+Wickham, Hadley, and Garrett Grolemund. 2017. <em>R for Data Science: Import, Tidy, Transform, Visualize, and Model Data</em>. 1st ed. Oâ€™Reilly Media. <a href="http://r4ds.had.co.nz/">http://r4ds.had.co.nz/</a>.
+</div>
 </div>
 
 <img src="img/logo.png" class="slide-logo r-stretch"><div class="footer footer-default">
diff --git a/output/04_data_analysis_files/figure-revealjs/fig-bar-3.png b/output/04_data_analysis_files/figure-revealjs/fig-bar-3.png
new file mode 100644
index 0000000000000000000000000000000000000000..74f428ba1b3983c4854d3115a16ddb773c1d92b2
Binary files /dev/null and b/output/04_data_analysis_files/figure-revealjs/fig-bar-3.png differ
diff --git a/output/04_data_analysis_files/figure-revealjs/fig-bar-5.png b/output/04_data_analysis_files/figure-revealjs/fig-bar-5.png
new file mode 100644
index 0000000000000000000000000000000000000000..74f428ba1b3983c4854d3115a16ddb773c1d92b2
Binary files /dev/null and b/output/04_data_analysis_files/figure-revealjs/fig-bar-5.png differ
diff --git a/output/04_data_analysis_files/figure-revealjs/fig-boxs-3.png b/output/04_data_analysis_files/figure-revealjs/fig-boxs-3.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b8fde2d151be5b30e4d2803a304ba0def0d2914
Binary files /dev/null and b/output/04_data_analysis_files/figure-revealjs/fig-boxs-3.png differ
diff --git a/output/04_data_analysis_files/figure-revealjs/fig-boxs-7.png b/output/04_data_analysis_files/figure-revealjs/fig-boxs-7.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b8fde2d151be5b30e4d2803a304ba0def0d2914
Binary files /dev/null and b/output/04_data_analysis_files/figure-revealjs/fig-boxs-7.png differ
diff --git a/output/04_data_analysis_files/figure-revealjs/fig-covarbars-5.png b/output/04_data_analysis_files/figure-revealjs/fig-covarbars-5.png
new file mode 100644
index 0000000000000000000000000000000000000000..eaecd87b18e6a6b62d1cac6bb2232b569511b642
Binary files /dev/null and b/output/04_data_analysis_files/figure-revealjs/fig-covarbars-5.png differ
diff --git a/output/04_data_analysis_files/figure-revealjs/fig-heatmap-9.png b/output/04_data_analysis_files/figure-revealjs/fig-heatmap-9.png
new file mode 100644
index 0000000000000000000000000000000000000000..23fbfa8a33aa00811331d35f804a2526905f7aa3
Binary files /dev/null and b/output/04_data_analysis_files/figure-revealjs/fig-heatmap-9.png differ
diff --git a/output/04_data_analysis_files/figure-revealjs/fig-hist-1.png b/output/04_data_analysis_files/figure-revealjs/fig-hist-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..4d396c98a31acdb4bddf2cc161a54648d46feacb
Binary files /dev/null and b/output/04_data_analysis_files/figure-revealjs/fig-hist-1.png differ
diff --git a/output/04_data_analysis_files/figure-revealjs/fig-scatter-11.png b/output/04_data_analysis_files/figure-revealjs/fig-scatter-11.png
new file mode 100644
index 0000000000000000000000000000000000000000..665e4c669e8fe688b34f92199a9e3e5b08d52997
Binary files /dev/null and b/output/04_data_analysis_files/figure-revealjs/fig-scatter-11.png differ
diff --git a/output/img/featureengineering2.png b/output/img/featureengineering2.png
new file mode 100644
index 0000000000000000000000000000000000000000..dddc6709391df37c76c8dac78fc9ff0e0db544cc
Binary files /dev/null and b/output/img/featureengineering2.png differ
diff --git a/references.bib b/references.bib
index e9b0e6b2ee173c1c5e03dc245a7d11ce63da2707..4e88ee0c6f1caad80d6811be3f3778ee807f8de3 100644
--- a/references.bib
+++ b/references.bib
@@ -91,4 +91,15 @@
   year = 2016
 }
 
+@book{Wickham2017R,
+  author = {Wickham, Hadley and Grolemund, Garrett},
+  edition = 1,
+  publisher = {O'Reilly Media},
+  title = {R for Data Science: Import, Tidy, Transform, Visualize, and Model Data},
+  url = {http://r4ds.had.co.nz/},
+  year = 2017
+}
+
+
+