From 5ac895c31a1070d8b6ebef4e094d45bfe0cceaf7 Mon Sep 17 00:00:00 2001 From: Kjetil Orbekk Date: Sat, 28 Oct 2017 08:50:27 -0400 Subject: DB normal form example --- db.Rmd | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 db.Rmd diff --git a/db.Rmd b/db.Rmd new file mode 100644 index 0000000..5df6a06 --- /dev/null +++ b/db.Rmd @@ -0,0 +1,80 @@ +# First normal form + +A relation is in first normal form if and only if the domain of each attribute +contains only atomic (indivisible) values, and the value of each attribute +contains only a single value from that domain. + +```{r result='asis'} +library(data.table) +library(knitr) + +tasks <- data.table( + task_id = 1:3, + description = c('Buy milk', 'Clean bedroom', 'Make coffee'), + owner = c('Alice,Bob', 'Bob', 'Alice') +) +setkey(tasks, task_id) + +kable(tasks) +``` + +The `tasks` table is not in 1NF because there can be multiple owners +of a task. + +```{r result='asis'} +owners <- tasks[, .(owner=unlist(strsplit(owner, ','))), by=.(task_id)] +setkey(owners, task_id, owner) + +kable(owners) +``` + +# Second normal form + +A relation is in 2NF if it is in 1NF and no non-prime attribute is dependent on +any proper subset of any candidate key of the relation. + +```{r result='asis'} +task_dependencies <- data.table( + task_id = 1:2, + depends_on = 3, + dependent_target_date = as.POSIXct(c('2017-10-29', '2017-11-30')) +) +setkey(task_dependencies, task_id, depends_on) +kable(task_dependencies) +``` + +This is not 2NF because `dependent_target_date` is dependent on a subset of the +key (`depends_on`). Fix by moving target_date to the tasks table. + +```{r result='asis'} +target_dates <- as.POSIXct(c('2017-10-29', '2017-11-30', '2017-10-28')) +kable(tasks[1:3, target_date := target_dates][]) +kable(task_dependencies[, dependent_target_date := NULL][]) +``` + +We can still get the target dates by joining: + +```{r result='asis'} +kable(tasks[task_dependencies, on=.(task_id = depends_on)]) +``` + +# Third normal form + +(1) the entity is in second normal form, and (2) all the attributes in a table +are determined only by the candidate keys of that relation and not by any +non-prime attributes + +```{r result='asis'} +tasks[1:3, `:=`(category = c('shopping', 'house work', 'cooking'), + category_location = c('in a store', 'home', 'home'))] +kable(tasks) +``` + +Not 3NF because category_location is determined by category, which is +not the primary key. + +```{r result='asis'} +categories <- tasks[, .(category, location = category_location)] +kable(categories) +kable(tasks[, `:=`(category = NULL)][]) +``` \ No newline at end of file -- cgit v1.2.3