#load required library
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
#load in dataset
Matt_ebird <- read_csv("https://github.com/mbtoomey/Biol_7263/blob/main/Data/MBT_ebird.csv?raw=true")
## New names:
## Rows: 6595 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): list_ID, common_name, scientific_name, location dbl (8): ...1, count,
## duration, latitude, longitude, count_tot, month, year date (1): date time (1):
## time
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
view(Matt_ebird)
In which year did I observe the most individual birds? How many? 2014: 9,303 birds
year_count <- Matt_ebird %>% #created new variable that will be just the year and counts
group_by(year) %>% #group the data by year
summarize(total_birds = sum(count)) #get the sum of the counts for each year
view(year_count)
#arrange into descending order to make it easier to see what year had the most birds
arrange(year_count, by = desc(total_birds))
## # A tibble: 13 × 2
## year total_birds
## <dbl> <dbl>
## 1 2014 9303
## 2 2020 8941
## 3 2017 6102
## 4 2021 3713
## 5 2015 3219
## 6 2018 1738
## 7 2022 1371
## 8 2004 1116
## 9 2019 677
## 10 2016 379
## 11 2013 151
## 12 2003 49
## 13 2009 25
In that year how many different species of birds did I observe? 210
species_2014 <- Matt_ebird %>% #create new variable
filter(year == 2014) %>% #filter by only the year 2014
count(scientific_name) #list each species and their observed counts
view(species_2014)
nrow(species_2014) #output the number of rows (number of different species observed)
## [1] 210
In which state did I most frequently observe Red-winged Blackbirds? Missouri
RWBB <- Matt_ebird %>% #create new variable
filter(common_name == "Red-winged Blackbird") %>% #filter by common name
count(location, sort = TRUE) #count how many birds were seen in each location and then sort by high to low
head(RWBB)
## # A tibble: 5 × 2
## location n
## <chr> <int>
## 1 US-MO 58
## 2 US-OK 26
## 3 US-VT 7
## 4 US-FL 2
## 5 US-IL 1
Filter observations for a duration between 5 and 200 minutes. Calculate the mean rate per checklist that I encounter species each year. Specifically, calculate the number of species in each checklist divided by duration and then take the mean for the year.
duration_filter <- Matt_ebird %>%
filter(duration >= 5 & duration <= 200) %>% #filter by duration to be equal to/between 5 and 200
group_by(list_ID) %>% #group by checklist
summarise(duration = first(duration), year = first(year), count = n_distinct(common_name)) #summarize duration and species count per checklist and also include the year, not sure if my count function is actually doing what I want here
rate_birds <- mutate(duration_filter, rate = (count/duration)) %>% #calculate the rate per checklist
group_by(year) %>% #group by year
summarise(mean_rate = mean(rate)) #calculate the mean rate per year
head(rate_birds)
## # A tibble: 6 × 2
## year mean_rate
## <dbl> <dbl>
## 1 2003 0.0306
## 2 2004 0.0660
## 3 2009 0.133
## 4 2013 0.187
## 5 2014 0.304
## 6 2015 0.289
Create a tibble that includes the complete observations for the top 10 most frequently observed species. First generate a top 10 list and then use this list to filter all observations. Export this tibble as a .csv file saved to a folder called “Results” folder within your R project and add link to the markdown document.
bird_freq <- Matt_ebird %>%
group_by(scientific_name) %>% #group by species
mutate(frequency = sum(count)) %>% #calculate frequency per species
slice(which.max(frequency)) #index by most frequently seen
Top_10_data <- head(arrange(bird_freq, by = desc(frequency)),10) #create new table arranged by the top ten most
#frequently seen bird in descending order of frequency
write_csv(Top_10_data, "Results/Top_10_data.csv")