7 Facebook

Since Facebook is very different to Twitter it has no open API with which one could collect posts from public profiles and pages. Therefore, we needed access to Facebooks subsidiary CrowdTangle which is the got to source for researcher if they wants access to posts from at least public pages and public verified profiles. We went through the process of signing an agreement with CrowdTangle to get access to their very own API which can be called through curl commands to search for posts from only public pages and groups as well as verified public sites on facebook. Therefore, you can only use the detailed functions provided here if you have access to CrowdTangle If you are a research group you can apply for access via if you already have a signed agreement with facebook: https://www.crowdtangle.com/request

We built our own functions to call their search API as well as a function to retrieve all the posts posted b the sites and pages added to lists in the Dashboard on our CrowdTangle account. To retrieve posts from all account in a list on the dashboard between a time interval we built the following function:

# CrowdTangle API Function for POSTS endpoint with Weights only set for Comments and Likes from list of Lists

# function needs:
# - token (character string)
# - start date (character string with the format "yyyy-mm-dd")
# - end date (character string with the format "yyyy-mm-dd")
# - lists the post shall be from (vector of list ids)

ct.posts <- function(token = NA, start_date = NA, end_date = NA, list_ids = NA, count = 100){
  # - required packages
  suppressPackageStartupMessages(require(httr))
  suppressPackageStartupMessages(require(dplyr))
  suppressPackageStartupMessages(require(rjson))
  suppressPackageStartupMessages(require(jsonlite))
  
  if(is.na(token) | nchar(token) != 40){
    stop("Please add the API Token of your projects dashboard!\nct.post(token = 'token')\n")
    }
  if(is.na(start_date) | !is.character(start_date)){
    stop("Please add start_date as a character string like '2020-01-01'!\n")
  }
  if(is.na(end_date) | !is.character(end_date)){
    stop("Please add end_date as acharacter string like '2020-01-02'!\n")
  }
  if(is.na(list_ids) | (!is.vector(list_ids) | !is.character(list_ids))){
    stop("Please add at least one ID of a list in the dashboard\nor multiple in a vector!\n")
  }
  if(count > 100 | count < 1){
    stop("The number of returned posts per paginated search can only be a number from 1 to 100!\n")
  }
  
  # - Define Offset Values 
  offset_val <- 0
  offset_size <- count
  
  for(i in 1:100) {
    # - start time (rate limit of 6 calls per minute)
    start <- Sys.time()
    
    # - search command:
    tmp_ret <- GET(paste0("https://api.crowdtangle.com/posts?token=",token,"&listIds=",list_ids,"&startDate=",start_date,"&endDate=",end_date,"&sortBy=date&count=",count,"&offset=",offset_val,"&weightComment=1&weightLike=1"))
    # - Transform Unicode to Text
    res_tmp_raw <- rawToChar(tmp_ret$content)
    # - Transform Text to JSON
    content_tmp_raw <- fromJSON(res_tmp_raw)
    content_tmp <- tibble(content_tmp_raw[["result"]][["posts"]])
    content_tmp <- flatten(content_tmp)
    # - Bind data 
    if(i == 1){
      content <- content_tmp
    } else {
      content <- bind_rows(content, content_tmp)
    }
    
    if(nrow(content_tmp) != 100) {
      break
      #cat("All Posts have been retrieved within this time frame!\n")
    } else {
      # - Still more than 100 new posts in timeframe
    }
    
    # - define next offset value
    offset_val <- offset_val + offset_size
    
    # - end time (rate limit of 6 calls per minute)
    end <- Sys.time()
    # - passed time for request and processing
    passed_time <- as.numeric(end - start)
    
    # -time to wait till next call
    if(passed_time < 21){
      Sys.sleep(21 - passed_time)
    } else {
      # Request took so long rate-limit will never be hit...
      #cat("No waiting required right now!\n")
    }
  }
  
  return(content)
}

An other function we built is to search for posts containing keywords. This function is a bit more complex as it needs more variables set correctly but the function should cover most of the things one wants to track.

# CrowdTangle API Function for POSTS/SEARCH endpoint:

# function needs:
# - token (character string)
# - start date (character string with the format "yyyy-mm-dd")
# - end date (character string with the format "yyyy-mm-dd")
# - search term (Returns only posts that match this search term. Terms AND automatically. Separate with commas for OR, use quotes for phrases. E.g. CrowdTangle API -> AND. CrowdTangle, API -> OR. "CrowdTangle API" -> AND in that exact order.)

ct.search <- function(token = NA, count_size = 100, platfroms = "facebook", searchTerms = NA,
                      start_date = NA, end_date = NA, sortby = "date", types = NA,
                      list_ids = NA, not_list_ids = NA, acc_ids = NA, not_acc_ids = NA,
                      minInter = NA, minSub = NA, not_title = NA, verified = "false", language = NA
                      ){
  # - required packages
  suppressPackageStartupMessages(require(httr))
  suppressPackageStartupMessages(require(dplyr))
  suppressPackageStartupMessages(require(rjson))
  suppressPackageStartupMessages(require(jsonlite))
  
  if(is.na(token) | nchar(token) != 40){
    stop("Please add the API Token of your projects dashboard!\nct.post(token = 'token')\n")
  }
  if(is.na(start_date) | !is.character(start_date)){
    stop("Please add start_date as a character string like '2020-01-01'!\n")
  }
  if(is.na(end_date) | !is.character(end_date)){
    stop("Please add end_date as acharacter string like '2020-01-02'!\n")
  }
  if(is.na(searchTerms) | !is.character(searchTerms)){
    stop("Please add at least one Search Term to the Search Query!\n")
  }
  if(count_size > 100 | count_size < 1){
    stop("The number of returned posts per paginated search can only be a number from 1 to 100!\n")
  }
  
  # - Define Offset Values 
  offset_val <- 0
  offset_size <- count_size
  
  for(i in 1:100) {
    # - start time (rate limit of 6 calls per minute)
    start <- Sys.time()
    
    # - search command:
    # - build search term:
    # - must have:
    if(i == 1){
      api_cmd <- paste0('https://api.crowdtangle.com/posts/search?token=',token,'&count=',count_size,'&startDate=',start_date,'&endDate=',end_date,'&offset=',offset_val,'&sortBy=',sortby)
      
      # - add Account Ids in which to search
      if(!is.na(acc_ids) == T & is.character(acc_ids)){
        api_cmd <- paste0(api_cmd,'&inAccountIds=',acc_ids)
      }
      # - add List Ids in which to search
      if(!is.na(list_ids) == T & is.character(list_ids)){
        api_cmd <- paste0(api_cmd,'&inListIds=',list_ids)
      }
      # - add how many interactions a post with the searchterm must have
      if(!is.na(minInter) == T & is.integer(minInter) & minInter >= 0){
        api_cmd <- paste0(api_cmd,'&minInteractions=',as.character(minInter))
      }
      # - add how many subscribers the page needs
      if(!is.na(minSub) == T & is.integer(minSub) & minSub >= 0){
        api_cmd <- paste0(api_cmd,'&minSubscriberCount=',as.character(minSub))
      }
      # - remove all accounts from results listed here
      if(!is.na(not_acc_ids) == T & is.character(not_acc_ids)){
        api_cmd <- paste0(api_cmd,'&notInAccountIds=',not_acc_ids)
      }
      # - remove all accounts from results listed in the lists listed here
      if(!is.na(not_list_ids) == T & is.character(not_list_ids)){
        api_cmd <- paste0(api_cmd,'&notinListIds=',not_list_ids)
      }
      # - remove all posts  from results with the title term
      if(!is.na(not_title) == T & is.character(not_title)){
        api_cmd <- paste0(api_cmd,'&notinTitle=',not_title)
      }
      # - add which platforms
      if(!is.na(platfroms) == T & is.character(platfroms)){
        api_cmd <- paste0(api_cmd,'&platforms=',platfroms)
      }
      # - add Search Terms 
      if(!is.na(searchTerms) == T & is.character(searchTerms)){
        api_cmd <- paste0(api_cmd,'&searchTerm=',platfroms)
      }
      # - add Types
      if(!is.na(types) == T & is.character(types)){
        api_cmd <- paste0(api_cmd,'&types=',platfroms)
      }
      # - Verified Only?
      if(!is.na(verified) == T & is.character(verified)){
        api_cmd <- paste0(api_cmd,'&verifiedOnly=',verified)
      }
      # - Which Languages (null means all languages)
      if(!is.na(language) == T & is.character(language)){
        api_cmd <- paste0(api_cmd,'&language=',verified)
      }
      
      api_cmd <- gsub("\\\"", "%22", api_cmd)
      api_cmd <- gsub(" ", "%20", api_cmd)
    } else {
      # api_cmd <- api_cmd
      # api_cmd <- gsub("\\\"", "%22", api_cmd)
      # api_cmd <- gsub(" ", "%20", api_cmd)
    }
    
    # API Call:
    tmp_ret <- GET(api_cmd)

    # - Transform Unicode to Text
    res_tmp_raw <- rawToChar(tmp_ret$content)
    # - Transform Text to JSON
    content_tmp_raw <- fromJSON(res_tmp_raw)
    content_tmp <- tibble(content_tmp_raw[["result"]][["posts"]])
    content_tmp <- flatten(content_tmp)
    # - Bind data 
    if(i == 1){
      content <- content_tmp
    } else {
      content <- bind_rows(content, content_tmp)
    }
    
    if(nrow(content_tmp) != 100) {
      break
      #cat("All Posts have been retrevied within this time frame!\n")
    } else {
      # - Still more than 100 new posts in timeframe
    }
    
    # - define next offset value
    offset_val <- offset_val + offset_size
    
    # - end time (rate limit of 6 calls per minute)
    end <- Sys.time()
    # - passed time for request and processing
    passed_time <- as.numeric(end - start)
    
    # -time to wait till next call
    if(passed_time < 21){
      Sys.sleep(21 - passed_time)
    } else {
      # Request took so long rate-limit will never be hit...
      #cat("No wating required right now!\n")
    }
  }
  
  return(content)
}

Obviously we use these functions in bigger scripts to get the desired posts on a daily basis with automated scripts.