The goal of this project is to visualize high-dimensional data from the GISS Surface Temperature Analysis data https://data.giss.nasa.gov/gistemp/.
The data set is comprised of many variables but we will focus on
The group variable allows us to apply custom colors to better observe the data. Our dependent variable or ‘y’ variable is temperature while our independent variable or ‘x’ is the year.
##import data
GISTEMPData <- read.csv("ExcelFormattedGISTEMPData2CSV.csv")
## observe first 5 lines of the data
head(GISTEMPData)
## Year Glob NHem SHem X24N.90N X24S.24N X90S.24S X64N.90N X44N.64N X24N.44N
## 1 1880 -19 -33 -5 -38 -16 -5 -89 -54 -22
## 2 1881 -10 -18 -2 -27 -2 -5 -54 -40 -14
## 3 1882 -9 -17 -1 -21 -10 4 -125 -20 -3
## 4 1883 -19 -30 -8 -34 -22 -2 -28 -57 -20
## 5 1884 -27 -42 -12 -56 -17 -11 -127 -58 -41
## 6 1885 -31 -41 -21 -61 -17 -20 -119 -70 -43
## EQU.24N X24S.EQU X44S.24S X64S.44S X90S.64S
## 1 -26 -5 -2 -8 39
## 2 -5 2 -6 -3 37
## 3 -12 -8 3 8 42
## 4 -25 -19 -1 0 37
## 5 -21 -14 -15 -5 40
## 6 -11 -23 -27 -7 38
## observe last 10 lines of the data
tail(GISTEMPData, n=10)
## Year Glob NHem SHem X24N.90N X24S.24N X90S.24S X64N.90N X44N.64N X24N.44N
## 126 2005 69 84 55 99 65 45 200 116 56
## 127 2006 64 80 47 96 56 40 173 105 67
## 128 2007 66 83 49 110 47 48 201 129 69
## 129 2008 54 66 42 88 39 40 144 102 61
## 130 2009 65 71 59 74 68 51 127 58 67
## 131 2010 71 88 55 98 69 48 199 86 74
## 132 2011 60 71 50 93 37 58 211 90 57
## 133 2012 63 77 50 97 51 44 189 89 73
## 134 2013 66 76 56 88 58 54 123 103 68
## 135 2014 75 91 58 106 67 54 185 113 76
## EQU.24N X24S.EQU X44S.24S X64S.44S X90S.64S
## 126 61 69 52 20 76
## 127 55 58 54 19 36
## 128 44 50 54 6 117
## 129 33 45 56 10 50
## 130 66 71 61 18 85
## 131 72 66 66 23 38
## 132 38 37 68 26 93
## 133 46 57 60 25 32
## 134 57 58 64 30 67
## 135 70 64 76 23 47
# Load libraries
library(tidyverse)
## ── Attaching packages ──────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidyr)
library(ggthemes)
library(viridis)
## Loading required package: viridisLite
# only want to plot the last ten rows
nrow(GISTEMPData)
## [1] 135
# Subset the data we want
# convert from wide to long format before plotting
GIS.long<-GISTEMPData[125:135,1:4] %>%
gather(key=group, value=temp, Glob:SHem, factor_key = TRUE)
#Plot
GIS.long %>%
ggplot( aes(x=Year, y=temp, group=group, color=group)) +
geom_line()+
ggtitle("Temperature from 2004-2014")+
ylab("Temperature")+
theme_clean()+
scale_color_viridis(discrete = TRUE)
ggsave(plot = last_plot(), filename = "GIS_temp_viz.png")
## Saving 7 x 5 in image
Over time, the culmulative surface temperature increases for each group and never intersects each group.