7 Facebook
Since Facebook is very different to Twitter it has no open API with which one could collect posts from public profiles and pages. Therefore, we needed access to Facebooks subsidiary CrowdTangle which is the got to source for researcher if they wants access to posts from at least public pages and public verified profiles. We went through the process of signing an agreement with CrowdTangle to get access to their very own API which can be called through curl commands to search for posts from only public pages and groups as well as verified public sites on facebook. Therefore, you can only use the detailed functions provided here if you have access to CrowdTangle If you are a research group you can apply for access via if you already have a signed agreement with facebook: https://www.crowdtangle.com/request
We built our own functions to call their search API as well as a function to retrieve all the posts posted b the sites and pages added to lists in the Dashboard on our CrowdTangle account. To retrieve posts from all account in a list on the dashboard between a time interval we built the following function:
# CrowdTangle API Function for POSTS endpoint with Weights only set for Comments and Likes from list of Lists
# function needs:
# - token (character string)
# - start date (character string with the format "yyyy-mm-dd")
# - end date (character string with the format "yyyy-mm-dd")
# - lists the post shall be from (vector of list ids)
ct.posts <- function(token = NA, start_date = NA, end_date = NA, list_ids = NA, count = 100){
# - required packages
suppressPackageStartupMessages(require(httr))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(rjson))
suppressPackageStartupMessages(require(jsonlite))
if(is.na(token) | nchar(token) != 40){
stop("Please add the API Token of your projects dashboard!\nct.post(token = 'token')\n")
}
if(is.na(start_date) | !is.character(start_date)){
stop("Please add start_date as a character string like '2020-01-01'!\n")
}
if(is.na(end_date) | !is.character(end_date)){
stop("Please add end_date as acharacter string like '2020-01-02'!\n")
}
if(is.na(list_ids) | (!is.vector(list_ids) | !is.character(list_ids))){
stop("Please add at least one ID of a list in the dashboard\nor multiple in a vector!\n")
}
if(count > 100 | count < 1){
stop("The number of returned posts per paginated search can only be a number from 1 to 100!\n")
}
# - Define Offset Values
offset_val <- 0
offset_size <- count
for(i in 1:100) {
# - start time (rate limit of 6 calls per minute)
start <- Sys.time()
# - search command:
tmp_ret <- GET(paste0("https://api.crowdtangle.com/posts?token=",token,"&listIds=",list_ids,"&startDate=",start_date,"&endDate=",end_date,"&sortBy=date&count=",count,"&offset=",offset_val,"&weightComment=1&weightLike=1"))
# - Transform Unicode to Text
res_tmp_raw <- rawToChar(tmp_ret$content)
# - Transform Text to JSON
content_tmp_raw <- fromJSON(res_tmp_raw)
content_tmp <- tibble(content_tmp_raw[["result"]][["posts"]])
content_tmp <- flatten(content_tmp)
# - Bind data
if(i == 1){
content <- content_tmp
} else {
content <- bind_rows(content, content_tmp)
}
if(nrow(content_tmp) != 100) {
break
#cat("All Posts have been retrieved within this time frame!\n")
} else {
# - Still more than 100 new posts in timeframe
}
# - define next offset value
offset_val <- offset_val + offset_size
# - end time (rate limit of 6 calls per minute)
end <- Sys.time()
# - passed time for request and processing
passed_time <- as.numeric(end - start)
# -time to wait till next call
if(passed_time < 21){
Sys.sleep(21 - passed_time)
} else {
# Request took so long rate-limit will never be hit...
#cat("No waiting required right now!\n")
}
}
return(content)
}
An other function we built is to search for posts containing keywords. This function is a bit more complex as it needs more variables set correctly but the function should cover most of the things one wants to track.
# CrowdTangle API Function for POSTS/SEARCH endpoint:
# function needs:
# - token (character string)
# - start date (character string with the format "yyyy-mm-dd")
# - end date (character string with the format "yyyy-mm-dd")
# - search term (Returns only posts that match this search term. Terms AND automatically. Separate with commas for OR, use quotes for phrases. E.g. CrowdTangle API -> AND. CrowdTangle, API -> OR. "CrowdTangle API" -> AND in that exact order.)
ct.search <- function(token = NA, count_size = 100, platfroms = "facebook", searchTerms = NA,
start_date = NA, end_date = NA, sortby = "date", types = NA,
list_ids = NA, not_list_ids = NA, acc_ids = NA, not_acc_ids = NA,
minInter = NA, minSub = NA, not_title = NA, verified = "false", language = NA
){
# - required packages
suppressPackageStartupMessages(require(httr))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(rjson))
suppressPackageStartupMessages(require(jsonlite))
if(is.na(token) | nchar(token) != 40){
stop("Please add the API Token of your projects dashboard!\nct.post(token = 'token')\n")
}
if(is.na(start_date) | !is.character(start_date)){
stop("Please add start_date as a character string like '2020-01-01'!\n")
}
if(is.na(end_date) | !is.character(end_date)){
stop("Please add end_date as acharacter string like '2020-01-02'!\n")
}
if(is.na(searchTerms) | !is.character(searchTerms)){
stop("Please add at least one Search Term to the Search Query!\n")
}
if(count_size > 100 | count_size < 1){
stop("The number of returned posts per paginated search can only be a number from 1 to 100!\n")
}
# - Define Offset Values
offset_val <- 0
offset_size <- count_size
for(i in 1:100) {
# - start time (rate limit of 6 calls per minute)
start <- Sys.time()
# - search command:
# - build search term:
# - must have:
if(i == 1){
api_cmd <- paste0('https://api.crowdtangle.com/posts/search?token=',token,'&count=',count_size,'&startDate=',start_date,'&endDate=',end_date,'&offset=',offset_val,'&sortBy=',sortby)
# - add Account Ids in which to search
if(!is.na(acc_ids) == T & is.character(acc_ids)){
api_cmd <- paste0(api_cmd,'&inAccountIds=',acc_ids)
}
# - add List Ids in which to search
if(!is.na(list_ids) == T & is.character(list_ids)){
api_cmd <- paste0(api_cmd,'&inListIds=',list_ids)
}
# - add how many interactions a post with the searchterm must have
if(!is.na(minInter) == T & is.integer(minInter) & minInter >= 0){
api_cmd <- paste0(api_cmd,'&minInteractions=',as.character(minInter))
}
# - add how many subscribers the page needs
if(!is.na(minSub) == T & is.integer(minSub) & minSub >= 0){
api_cmd <- paste0(api_cmd,'&minSubscriberCount=',as.character(minSub))
}
# - remove all accounts from results listed here
if(!is.na(not_acc_ids) == T & is.character(not_acc_ids)){
api_cmd <- paste0(api_cmd,'¬InAccountIds=',not_acc_ids)
}
# - remove all accounts from results listed in the lists listed here
if(!is.na(not_list_ids) == T & is.character(not_list_ids)){
api_cmd <- paste0(api_cmd,'¬inListIds=',not_list_ids)
}
# - remove all posts from results with the title term
if(!is.na(not_title) == T & is.character(not_title)){
api_cmd <- paste0(api_cmd,'¬inTitle=',not_title)
}
# - add which platforms
if(!is.na(platfroms) == T & is.character(platfroms)){
api_cmd <- paste0(api_cmd,'&platforms=',platfroms)
}
# - add Search Terms
if(!is.na(searchTerms) == T & is.character(searchTerms)){
api_cmd <- paste0(api_cmd,'&searchTerm=',platfroms)
}
# - add Types
if(!is.na(types) == T & is.character(types)){
api_cmd <- paste0(api_cmd,'&types=',platfroms)
}
# - Verified Only?
if(!is.na(verified) == T & is.character(verified)){
api_cmd <- paste0(api_cmd,'&verifiedOnly=',verified)
}
# - Which Languages (null means all languages)
if(!is.na(language) == T & is.character(language)){
api_cmd <- paste0(api_cmd,'&language=',verified)
}
api_cmd <- gsub("\\\"", "%22", api_cmd)
api_cmd <- gsub(" ", "%20", api_cmd)
} else {
# api_cmd <- api_cmd
# api_cmd <- gsub("\\\"", "%22", api_cmd)
# api_cmd <- gsub(" ", "%20", api_cmd)
}
# API Call:
tmp_ret <- GET(api_cmd)
# - Transform Unicode to Text
res_tmp_raw <- rawToChar(tmp_ret$content)
# - Transform Text to JSON
content_tmp_raw <- fromJSON(res_tmp_raw)
content_tmp <- tibble(content_tmp_raw[["result"]][["posts"]])
content_tmp <- flatten(content_tmp)
# - Bind data
if(i == 1){
content <- content_tmp
} else {
content <- bind_rows(content, content_tmp)
}
if(nrow(content_tmp) != 100) {
break
#cat("All Posts have been retrevied within this time frame!\n")
} else {
# - Still more than 100 new posts in timeframe
}
# - define next offset value
offset_val <- offset_val + offset_size
# - end time (rate limit of 6 calls per minute)
end <- Sys.time()
# - passed time for request and processing
passed_time <- as.numeric(end - start)
# -time to wait till next call
if(passed_time < 21){
Sys.sleep(21 - passed_time)
} else {
# Request took so long rate-limit will never be hit...
#cat("No wating required right now!\n")
}
}
return(content)
}
Obviously we use these functions in bigger scripts to get the desired posts on a daily basis with automated scripts.