speech
package Nicolás Schmidt, Diego Luján, Juan Andrés Moraes
Converts the floor speeches of Uruguayan legislators, extracted from the parliamentary minutes, to tidy data.frame where each observation is the intervention of a single legislator.
# Install speech from CRAN
install.packages("speech")
# The development version from GitHub:
if (!require("remotes")) install.packages("remotes")
remotes::install_github("Nicolas-Schmidt/speech")
You can see more complex examples in the following link.
library(speech)
url <- "https://parlamento.gub.uy/documentosyleyes/documentos/diarios-de-sesion/6084/IMG"
text <- speech::speech_build(file = url)
text
#> # A tibble: 24 × 7
#> legislator speech chamber date legislature id sex
#> <chr> <chr> <chr> <date> <int> <chr> <dbl>
#> 1 BORDABERRY SEÑOR BORDABERRY. Pido… COMISI… 2019-09-17 48 2019… 1
#> 2 BORDABERRY SEÑOR BORDABERRY. Prop… COMISI… 2019-09-17 48 2019… 1
#> 3 AVIAGA SEÑORA AVIAGA. Pido la… COMISI… 2019-09-17 48 2019… 0
#> 4 AVIAGA SEÑORA AVIAGA. En el m… COMISI… 2019-09-17 48 2019… 0
#> 5 GOÑI SEÑOR GOÑI. Pido la pa… COMISI… 2019-09-17 48 2019… 1
#> 6 GOÑI SEÑOR GOÑI. El Frente … COMISI… 2019-09-17 48 2019… 1
#> 7 MAHIA SEÑOR MAHIA. Pido la p… COMISI… 2019-09-17 48 2019… 1
#> 8 MAHIA SEÑOR MAHIA. Gracias, … COMISI… 2019-09-17 48 2019… 1
#> 9 ABDALA SEÑOR ABDALA. Voto por… COMISI… 2019-09-17 48 2019… 1
#> 10 ASTI SEÑOR ASTI. Obviamente… COMISI… 2019-09-17 48 2019… 1
#> # ℹ 14 more rows
speech_check(text, initial = c("A", "M"))
#> $A
#> legislator
#> 1 ABDALA
#> 2 ASTI
#> 3 AVIAGA
#>
#> $M
#> legislator
#> 1 MAHIA
#> 2 MERONI
text <- speech::speech_build(file = url, compiler = TRUE)
text
#> # A tibble: 11 × 7
#> legislator legislature chamber date id speech sex
#> <chr> <int> <chr> <date> <chr> <chr> <dbl>
#> 1 ABDALA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 2 ASTI 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 3 AVIAGA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 0
#> 4 BORDABERRY 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 5 GOÑI 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 6 LAZO 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 0
#> 7 MAHIA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 8 MERONI 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 9 PEREYRA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 0
#> 10 TOURNE 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 0
#> 11 VIERA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
text$word <- speech_word_count(text$speech)
dplyr::glimpse(text)
#> Rows: 11
#> Columns: 8
#> $ legislator <chr> "ABDALA", "ASTI", "AVIAGA", "BORDABERRY", "GOÑI", "LAZO", …
#> $ legislature <int> 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
#> $ chamber <chr> "COMISION PERMANENTE", "COMISION PERMANENTE", "COMISION PE…
#> $ date <date> 2019-09-17, 2019-09-17, 2019-09-17, 2019-09-17, 2019-09-17…
#> $ id <chr> "20190917c0001", "20190917c0001", "20190917c0001", "20190…
#> $ speech <chr> "SEÑOR ABDALA. Voto por la señora legisladora Daisy Tourné…
#> $ sex <dbl> 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1
#> $ word <int> 398, 46, 105, 951, 98, 103, 126, 12, 12, 111, 8
library(magrittr)
minchar <- function(string, min = 3){
string <- stringr::str_remove_all(string, "[[:punct:]]")
string <- unlist(strsplit(string, " "))
string[nchar(string) > min]
}
text$speech %>%
minchar(., min = 4) %>%
quanteda::corpus() %>%
quanteda::dfm(remove = c("señor", "señora")) %>%
quanteda.textplots::textplot_wordcloud(color = rev(RColorBrewer::brewer.pal(10, "RdBu")))
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.2.3
text$speech %>%
minchar(., min = 4) %>%
tibble::enframe() %>%
tidytext::unnest_tokens(word, value) %>%
dplyr::count(word, sort = TRUE) %>%
dplyr::mutate(word = stats::reorder(word, n)) %>%
dplyr::filter(!stringr::str_detect(word, "^señor")) %>%
.[1:40,] %>%
ggplot(aes(word, n)) +
geom_col(col = "black", fill = "#00A08A", width = .7) +
labs(x = "", y = "") +
coord_flip() +
theme_minimal()
urls <- speech_url(chamber = "D", days = c("2002-06-12", "2004-04-14"))
rollcall <- speech_rollcall(file = urls)
rollcall
#> # A tibble: 165 × 10
#> legislator vote argument speech chamber date legislature rollcall
#> <chr> <dbl> <dbl> <chr> <chr> <date> <int> <int>
#> 1 ABDALA 0 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 2 AGAZZI 1 1 SEÑOR… CAMARA… 2002-06-12 45 1
#> 3 AMEN VAGHETTI 0 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 4 AMORIN BATLLE 0 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 5 ARAUJO 0 1 SEÑOR… CAMARA… 2002-06-12 45 1
#> 6 ARGENZIO 0 1 SEÑOR… CAMARA… 2002-06-12 45 1
#> 7 ARGIMON 0 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 8 ARRARTE FERNAN… 1 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 9 ARREGUI 1 1 SEÑOR… CAMARA… 2002-06-12 45 1
#> 10 BARAIBAR 1 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> # ℹ 155 more rows
#> # ℹ 2 more variables: id <chr>, sex <dbl>
summary(rollcall)
#> # A tibble: 2 × 10
#> Chamber Date Legislators Affirmative Negative prop_AF prop_NG prop_women
#> <chr> <date> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 CRR 2002-06-12 92 45 47 48.9 51.1 13.0
#> 2 CRR 2004-04-14 73 32 41 43.8 56.2 15.1
#> # ℹ 2 more variables: prop_arg <dbl>, rc <int>
To cite packagespeech
in publications, please use:
citation(package = 'speech')
#>
#> To cite speech in publications use:
#>
#>
#> A BibTeX entry for LaTeX users is
#>
#> @Article{,
#> title = {Estimating Parties’ Policy Positions in Uruguay: Comparing Scaling Methods Based on Legislative Speeches and Roll-Call Votes},
#> author = {Diego Luján and Nicolás Schmidt and Juan A. Moraes},
#> journal = {Latin American Politics and Society},
#> year = {2023},
#> volume = {0},
#> number = {0},
#> pages = {1-17},
#> url = {doi:10.1017/lap.2023.12},
#> }
Nicolas Schmidt (nschmidt@cienciassociales.edu.uy)