class: title-slide <br> <br> .right-panel[ # Joining Data ## Dr. Mine Dogucu ] --- class: middle <img src="img/data_joins.png" width="340" /> .font15[Image from [R for Data Science book](https://r4ds.had.co.nz/index.html) licensed under the [Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License](https://creativecommons.org/licenses/by-nc-nd/3.0/us/)] --- class: middle ## Inner Join <img src="img/inner_join.png" width="1056" /> .font15[Image from [R for Data Science book](https://r4ds.had.co.nz/index.html) licensed under the [Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License](https://creativecommons.org/licenses/by-nc-nd/3.0/us/)] --- class: middle #### Outer Joins <img src="img/outer_joins.png" width="40%" /> .font15[Image from [R for Data Science book](https://r4ds.had.co.nz/index.html) licensed under the [Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License](https://creativecommons.org/licenses/by-nc-nd/3.0/us/)] --- class: middle ## Data Joins <img src="img/join_venn.png" width="80%" /> .font15[Image from [R for Data Science book](https://r4ds.had.co.nz/index.html) licensed under the [Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License](https://creativecommons.org/licenses/by-nc-nd/3.0/us/)] --- .panelset[ .panel[ .panel-name[artists] ```r artists ``` ``` ## # A tibble: 4 × 2 ## name followers ## <chr> <dbl> ## 1 Beyoncé 24757958 ## 2 Taylor Swift 33098116 ## 3 Ariana Grande 51807131 ## 4 Drake 50252529 ``` ] .panel[ .panel-name[songs] ```r songs ``` ``` ## # A tibble: 5 × 4 ## name song_name album_name song_popularity ## <chr> <chr> <chr> <dbl> ## 1 Beyoncé Savage Remix (feat. Beyoncé) Savage Rem… 83 ## 2 Taylor Swift cardigan folklore 85 ## 3 Drake Laugh Now Cry Later (feat. Lil Durk) Laugh Now … 95 ## 4 Beyoncé Halo I AM…SASHA… NA ## 5 Ariana Grande Stuck with U (with Justin Bieber) Stuck with… NA ``` ] .panel[ .panel-name[albums] ```r albums ``` ``` ## # A tibble: 4 × 2 ## album_name album_release_date ## <chr> <date> ## 1 Savage Remix (feat. Beyoncé) 2020-04-29 ## 2 I AM…SASHA FIERCE 2008-11-14 ## 3 Stuck with U 2020-05-08 ## 4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14 ``` ] ] --- class: middle <img src="img/data_joins_spotify.png" width="90%" /> --- class: middle ## Join Functions `something_join(x, y)` All the join functions in dplyr package are in this fashion where x represents the first data frame and y represents the second data frame. --- .panelset[ .panel[ .panel-name[x] ```r songs ``` ``` ## # A tibble: 5 × 4 ## name song_name album_name song_popularity ## <chr> <chr> <chr> <dbl> ## 1 Beyoncé Savage Remix (feat. Beyoncé) Savage Rem… 83 ## 2 Taylor Swift cardigan folklore 85 ## 3 Drake Laugh Now Cry Later (feat. Lil Durk) Laugh Now … 95 ## 4 Beyoncé Halo I AM…SASHA… NA ## 5 Ariana Grande Stuck with U (with Justin Bieber) Stuck with… NA ``` ] .panel[ .panel-name[y] ```r albums ``` ``` ## # A tibble: 4 × 2 ## album_name album_release_date ## <chr> <date> ## 1 Savage Remix (feat. Beyoncé) 2020-04-29 ## 2 I AM…SASHA FIERCE 2008-11-14 ## 3 Stuck with U 2020-05-08 ## 4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14 ``` ] .panel[ .panel-name[left_join()] ```r left_join(songs, albums, by = "album_name") ``` ``` ## # A tibble: 5 × 5 ## name song_name album_name song_popularity album_release_d… ## <chr> <chr> <chr> <dbl> <date> ## 1 Beyoncé Savage Remix (feat.… Savage Re… 83 2020-04-29 ## 2 Taylor Swift cardigan folklore 85 NA ## 3 Drake Laugh Now Cry Later… Laugh Now… 95 2020-08-14 ## 4 Beyoncé Halo I AM…SASH… NA 2008-11-14 ## 5 Ariana Grande Stuck with U (with … Stuck wit… NA 2020-05-08 ``` ] ] `left_join()` includes all rows from x --- .panelset[ .panel[ .panel-name[x] ```r songs ``` ``` ## # A tibble: 5 × 4 ## name song_name album_name song_popularity ## <chr> <chr> <chr> <dbl> ## 1 Beyoncé Savage Remix (feat. Beyoncé) Savage Rem… 83 ## 2 Taylor Swift cardigan folklore 85 ## 3 Drake Laugh Now Cry Later (feat. Lil Durk) Laugh Now … 95 ## 4 Beyoncé Halo I AM…SASHA… NA ## 5 Ariana Grande Stuck with U (with Justin Bieber) Stuck with… NA ``` ] .panel[ .panel-name[y] ```r albums ``` ``` ## # A tibble: 4 × 2 ## album_name album_release_date ## <chr> <date> ## 1 Savage Remix (feat. Beyoncé) 2020-04-29 ## 2 I AM…SASHA FIERCE 2008-11-14 ## 3 Stuck with U 2020-05-08 ## 4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14 ``` ] .panel[ .panel-name[right_join()] ```r right_join(songs, albums, by = "album_name") ``` ``` ## # A tibble: 4 × 5 ## name song_name album_name song_popularity album_release_d… ## <chr> <chr> <chr> <dbl> <date> ## 1 Beyoncé Savage Remix (feat.… Savage Re… 83 2020-04-29 ## 2 Drake Laugh Now Cry Later… Laugh Now… 95 2020-08-14 ## 3 Beyoncé Halo I AM…SASH… NA 2008-11-14 ## 4 Ariana Grande Stuck with U (with … Stuck wit… NA 2020-05-08 ``` ] ] `right_join()` includes all rows from `y` --- .panelset[ .panel[ .panel-name[x] ```r songs ``` ``` ## # A tibble: 5 × 4 ## name song_name album_name song_popularity ## <chr> <chr> <chr> <dbl> ## 1 Beyoncé Savage Remix (feat. Beyoncé) Savage Rem… 83 ## 2 Taylor Swift cardigan folklore 85 ## 3 Drake Laugh Now Cry Later (feat. Lil Durk) Laugh Now … 95 ## 4 Beyoncé Halo I AM…SASHA… NA ## 5 Ariana Grande Stuck with U (with Justin Bieber) Stuck with… NA ``` ] .panel[ .panel-name[y] ```r albums ``` ``` ## # A tibble: 4 × 2 ## album_name album_release_date ## <chr> <date> ## 1 Savage Remix (feat. Beyoncé) 2020-04-29 ## 2 I AM…SASHA FIERCE 2008-11-14 ## 3 Stuck with U 2020-05-08 ## 4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14 ``` ] .panel[ .panel-name[inner_join()] ```r inner_join(songs, albums, by = "album_name") ``` ``` ## # A tibble: 4 × 5 ## name song_name album_name song_popularity album_release_d… ## <chr> <chr> <chr> <dbl> <date> ## 1 Beyoncé Savage Remix (feat.… Savage Re… 83 2020-04-29 ## 2 Drake Laugh Now Cry Later… Laugh Now… 95 2020-08-14 ## 3 Beyoncé Halo I AM…SASH… NA 2008-11-14 ## 4 Ariana Grande Stuck with U (with … Stuck wit… NA 2020-05-08 ``` ] ] `inner_join()` includes all rows that are in x **and** y --- .panelset[ .panel[ .panel-name[x] ```r songs ``` ``` ## # A tibble: 5 × 4 ## name song_name album_name song_popularity ## <chr> <chr> <chr> <dbl> ## 1 Beyoncé Savage Remix (feat. Beyoncé) Savage Rem… 83 ## 2 Taylor Swift cardigan folklore 85 ## 3 Drake Laugh Now Cry Later (feat. Lil Durk) Laugh Now … 95 ## 4 Beyoncé Halo I AM…SASHA… NA ## 5 Ariana Grande Stuck with U (with Justin Bieber) Stuck with… NA ``` ] .panel[ .panel-name[y] ```r albums ``` ``` ## # A tibble: 4 × 2 ## album_name album_release_date ## <chr> <date> ## 1 Savage Remix (feat. Beyoncé) 2020-04-29 ## 2 I AM…SASHA FIERCE 2008-11-14 ## 3 Stuck with U 2020-05-08 ## 4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14 ``` ] .panel[ .panel-name[full_join()] ```r full_join(songs, albums, by = "album_name") ``` ``` ## # A tibble: 5 × 5 ## name song_name album_name song_popularity album_release_d… ## <chr> <chr> <chr> <dbl> <date> ## 1 Beyoncé Savage Remix (feat.… Savage Re… 83 2020-04-29 ## 2 Taylor Swift cardigan folklore 85 NA ## 3 Drake Laugh Now Cry Later… Laugh Now… 95 2020-08-14 ## 4 Beyoncé Halo I AM…SASH… NA 2008-11-14 ## 5 Ariana Grande Stuck with U (with … Stuck wit… NA 2020-05-08 ``` ] ] `full_join()` includes all rows that are in x **or** y --- .panelset[ .panel[ .panel-name[artists] ```r artists ``` ``` ## # A tibble: 4 × 2 ## name followers ## <chr> <dbl> ## 1 Beyoncé 24757958 ## 2 Taylor Swift 33098116 ## 3 Ariana Grande 51807131 ## 4 Drake 50252529 ``` ] .panel[ .panel-name[songs] ```r songs ``` ``` ## # A tibble: 5 × 4 ## name song_name album_name song_popularity ## <chr> <chr> <chr> <dbl> ## 1 Beyoncé Savage Remix (feat. Beyoncé) Savage Rem… 83 ## 2 Taylor Swift cardigan folklore 85 ## 3 Drake Laugh Now Cry Later (feat. Lil Durk) Laugh Now … 95 ## 4 Beyoncé Halo I AM…SASHA… NA ## 5 Ariana Grande Stuck with U (with Justin Bieber) Stuck with… NA ``` ] .panel[ .panel-name[albums] ```r albums ``` ``` ## # A tibble: 4 × 2 ## album_name album_release_date ## <chr> <date> ## 1 Savage Remix (feat. Beyoncé) 2020-04-29 ## 2 I AM…SASHA FIERCE 2008-11-14 ## 3 Stuck with U 2020-05-08 ## 4 Laugh Now Cry Later (feat. Lil Durk) 2020-08-14 ``` ] .panel[ .panel-name[combined] ```r full_join(artists, songs, by = "name") %>% full_join(albums, by = "album_name") ``` ``` ## # A tibble: 5 × 6 ## name followers song_name album_name song_popularity album_release_d… ## <chr> <dbl> <chr> <chr> <dbl> <date> ## 1 Beyoncé 24757958 Savage Re… Savage Re… 83 2020-04-29 ## 2 Beyoncé 24757958 Halo I AM…SASH… NA 2008-11-14 ## 3 Taylor Swift 33098116 cardigan folklore 85 NA ## 4 Ariana Grande 51807131 Stuck wit… Stuck wit… NA 2020-05-08 ## 5 Drake 50252529 Laugh Now… Laugh Now… 95 2020-08-14 ``` ] ]