####################################################### # Federico Maria Ferrara, University of Geneva # The Battle of Ideas on the Euro Crisis: Evidence from ECB Inter-Meeting Speeches ######################################################### ##### Load Packages library(austin) library(ggplot2) library(quanteda) library(readtext) library(stringr) library(gsubfn) library(scales) library(ggthemes) library(RTextTools) library(e1071) library(plyr) library(rpart) library(preText) library(zoo) set.seed(42) # Load Replication Data containing Sample of Speeches for Classification and All Speeches load("~/Replication Material.RData") ### Corpus Creation and Text Pre-Processing for Sample of Speeches sample <- corpus(data_sample) tt <- vector() tt[1:57] <- "fiscal" tt[58:999] <- "no_fiscal" docvars(sample, "Fiscal") <- tt sample <- corpus_sample(sample, size = 999) docvars(sample, "id") <- as.numeric(1:nrow(sample$documents)) FiscalDfm <- dfm(sample, tolower = TRUE, remove = c(stopwords("english"), "will"), remove_punct = TRUE, remove_numbers = TRUE, remove_separators = TRUE, stem = TRUE) FiscalDfm <- dfm_trim(FiscalDfm, min_count = 3) FiscalDfm <- tfidf(FiscalDfm) ### SVM and CART with e1071 (SVM only for comparison) fiscal_lab <- factor(sample$documents$Fiscal) fiscal_lab <- as.data.frame(fiscal_lab) train_data <- as.data.frame(FiscalDfm[1:666,]) test_data <- as.data.frame(FiscalDfm[666:999,]) fiscal_lab1 <- fiscal_lab[1:666,] fiscal_lab2 <- fiscal_lab[666:999,] train_data <- train_data[,-1] test_data <- test_data[,-1] svm.model <- svm(fiscal_lab1 ~ ., data = train_data, cost = 10^5, gamma = 10^-5) svm.pred <- predict(svm.model, test_data) table(unname(svm.pred), fiscal_lab2) rpart.model <- rpart(fiscal_lab1 ~ ., data = train_data) rpart.pred <- predict(rpart.model, test_data, type = "class") table(unname(rpart.pred), fiscal_lab2) library(caret) confus.svm <- confusionMatrix(unname(svm.pred), fiscal_lab2) confus.rpart <- confusionMatrix(unname(rpart.pred), fiscal_lab2) accuracy.svm <- confus.svm$overall["Accuracy"] precision.svm <- posPredValue(svm.pred, fiscal_lab2) accuracy.rpar <- confus.rpart$overall["Accuracy"] precision.rpart <- posPredValue(rpart.pred, fiscal_lab2) ### Corpus Creation and Text Pre-Processing for All Speeches mycorpusECB <- corpus(data_ecb_speeches) tt <- summary(mycorpusECB, n = 1619, stringsAsFactors=FALSE)$Text tt <- as.character.factor(tt) sentences <- summary(mycorpusECB, n = 1619)$Sentences dates <- gsub(".*_", "", tt) dates <- gsub(".txt", "", dates) speaker <- strapplyc(tt, "_(.*?)_", simplify = c) lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") ECBdates <- as.Date(dates, format="%d%B%Y") docvars(mycorpusECB, "Date") <- paste(ECBdates) docvars(mycorpusECB, "Speaker") <- paste(speaker) docvars(mycorpusECB, "Sentences") <- paste(sentences) # Transform Corpus into Paragraphs textcorpus <- corpus_reshape(mycorpusECB, to = "paragraphs") docvars(textcorpus, "id") <- as.numeric(1:nrow(textcorpus$documents)) # Assign Readable Name to Each Document speaker <- summary(textcorpus, n=72224)$Speaker speak <- str_replace_all(speaker, "[^[:alnum:]]", " ") category <- function(x) if(x == "Benoi t Cœure ") "Coeure" else if (x == "Christian Noyer") "Noyer" else if (x == "Eugenio Domingo Solans") "Solans" else if (x=="Gertrude Tumpel Gugerell") "Tumpel" else if (x == "Jean Claude Trichet") "Trichet" else if (x == "Jo rg Asmussen") "Asmussen" else if (x == "Jose Manuel Gonza lez Pa ramo") "Gonzalez" else if (x == "Ju rgen Stark") "Stark" else if (x == "Lucas Papademos") "Papademos" else if (x == "Mario Draghi") "Draghi" else if (x == "Lorenzo Bini Smaghi") "BiniSmaghi" else if (x == "Willem Duisenberg") "Duisenberg" else if (x == "Sabine Lautenschla ger") "Lautenschlager" else if (x == "Yves Mersch") "Mersch" else if (x == "Peter Praet") "Praet" else if (x == "Sirkka Ha ma la inen") "Hamalainen" else if (x == "Vi tor Consta ncio") "Constancio" else if (x == "Tommaso Padoa Schioppa") "PadoaSchioppa" else if (x == "Otmar Issing") "Issing" name <- sapply(speak,category) docvars(textcorpus, "speaker") <- unname(name) OutDfm <- dfm(textcorpus, tolower = TRUE, remove = c(stopwords("english"), "will"), remove_punct = TRUE, remove_numbers = TRUE, remove_separators = TRUE, stem = TRUE) OutDfm <- dfm_trim(OutDfm, min_count = 3) OutDfm <- tfidf(OutDfm) ### CART Out-of-Sample Prediction prediction_data <- as.data.frame(OutDfm) prediction_data <- prediction_data[,-1] pred <- predict(rpart.model, prediction_data) pred <- unname(pred) prediction <- pred[1:nrow(pred),1] category <- function(x) if(x > 0.5) "fiscal" else "no_fiscal" group <- sapply(prediction,category) docvars(textcorpus, "pred") <- group fiscalcorpus <- corpus_subset(textcorpus, pred == "fiscal") docvars(fiscalcorpus, "Year") <- substring(docvars(fiscalcorpus, "Date"),1,4) ####################################################### # Results by Quarter ####################################################### ### Subset by quarter yq <- as.yearqtr(docvars(fiscalcorpus, "Date"), format = "%Y-%m-%d") docvars(fiscalcorpus, "quarter") <- yq for(i in 2009:2016){ for(j in 1:4) { corpus_new <- paste0("corpus_",i,"q",j) corpus_to_assign <- corpus_subset(fiscalcorpus, quarter == paste0(i," ", "Q", j)) assign(corpus_new, corpus_to_assign)}} ### Scaling by Quarter setwd("~/ECB Communication by Quarter") for(i in 2009:2016) # for each row { for(j in 1:4) # for each column { out <- texts(get(paste0("corpus_",i,"q",j))) thetaOut <- paste0(i,"q",j,".txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") } } data <- readtext("~/ECB Communication by Quarter/*.txt") tt <- list.files(path = "~/ECB Communication by Quarter") data$speaker <- tt final_corpus <- corpus(data) finalDfm <- dfm(final_corpus, tolower = FALSE, remove_punct = TRUE, remove_numbers = TRUE, stem=TRUE, remove = stopwords("english")) finalDfm <- dfm_trim(finalDfm, sparsity = 0.8) # Figure 1 dfm_austin <- convert(finalDfm, to = "austin") ieWF <- wordfish(dfm_austin, dir=c(1,5)) word_data <- data.frame(words = ieWF$words, weights = ieWF$beta) word_data <- word_data[order(word_data$weights),] library(ggrepel) words_to_mark <- c("fiscal", "vicious", "fragment", "inflationari", "debt-to-GDP", "nexus", "discretionari", "bank-sovereign", "backstop", "shock-absorb", "risk-shar", "counter-cycl", "bailout", "bail-out", "resolut", "adjust", "criteria", "consolid", "stimuli", "stress", "transmiss", "deterior", "unsustain", "fundament", "deficit", "destabilis", "budgetari", "tighter", "expect") ieWF$different_colour <- as.factor(ieWF$words %in% words_to_mark) ieWF$left_right <- as.factor(ieWF$beta < -0.1) ieWF2 <- data.frame(psi =ieWF$psi, beta =ieWF$beta, words =ieWF$words, words_to_mark = ieWF$different_colour, left_right = ieWF$left_right) plot <- ggplot() + geom_point(data=ieWF2,aes(x = ieWF2$beta,y=ieWF2$psi),size=.5, colour = "grey70") + geom_text_repel(data=ieWF2[ieWF2$words_to_mark==TRUE,],aes(x = beta,y=psi,label =words,colour = left_right),size=5) + scale_color_manual(values=c("red","blue")) + theme(legend.position = "none") + labs(x= "Beta Coefficients", y= "Word Fixed Effects") + theme_hc() + theme(legend.position="none") plot + theme(axis.title.x = element_text(size=16, margin=margin(20,0,0,0)), axis.title.y = element_text(size=16, margin=margin(0,20,0,0)), axis.text.x = element_text(vjust=0.5, size=14), axis.text.y = element_text(vjust=0.5, size=14)) ### Figure 2 wfm <- textmodel_wordfish(finalDfm, dir = c(1,5)) plot_wfm <- textplot_scale1d(wfm) score <- plot_wfm$data$theta upper <- plot_wfm$data$upper lower <- plot_wfm$data$lower doc <- gsub(".txt", "", plot_wfm$data$doclabels) doc <- gsub("q", " Q", doc) doc <- as.Date(as.yearqtr(doc, format="%Y Q%q")) quarter_data <- data.frame(quarter = doc, score = score, lower= lower, upper= upper) plot_quarter <- ggplot(quarter_data, aes(quarter, score, group = 1)) + geom_point(size=1.5, shape=21, fill="black") + geom_errorbar(data = quarter_data, mapping=aes (x = quarter, ymin = upper, ymax = lower), width = 0.05, size = 0.5) + ylab("Systemic Risk vs. Fiscal Discipline") + xlab("Year") + theme_hc() + theme(plot.title = element_text(hjust = 0.5)) + geom_smooth(formula = y ~ x) + scale_x_date(breaks = "1 year", date_labels = "%Y") plot_quarter + theme(axis.title.x = element_text(size=16, margin=margin(20,0,0,0)), axis.title.y = element_text(size=14, margin=margin(0,20,0,0)), axis.text.x = element_text(vjust=0.5, size=14), axis.text.y = element_text(vjust=0.5, size=14)) ####################################################### # Results by Speaker ####################################################### ### Results for Table A2 # N. Documents by Board Member (taking into account only crisis period) docvars(mycorpusECB, "Year") <- substring(docvars(mycorpusECB, "Date"),1,4) mycorpusECB_crisis <- corpus_subset(mycorpusECB, Year>2008) table(mycorpusECB_crisis$documents$Speaker) # Transform corpus into paragrapsh textcorpus <- corpus_reshape(mycorpusECB, to = "paragraphs") docvars(textcorpus, "id") <- as.numeric(1:nrow(textcorpus$documents)) # N. Paragaphs by Board Member textcorpus_crisis <- corpus_subset(textcorpus, Year>2008) table(textcorpus_crisis$documents$Speaker) # N. Fiscal Paragaphs by Board Member fiscalcorpus_crisis <- corpus_subset(fiscalcorpus, Year >2008) table(fiscalcorpus_crisis$documents$Speaker) # Subset by Speaker corpus_coeure <- corpus_subset(fiscalcorpus_crisis, speaker == "Coeure") corpus_noyer <- corpus_subset(fiscalcorpus_crisis, speaker == "Noyer") corpus_solans <- corpus_subset(fiscalcorpus_crisis, speaker == "Solans") corpus_tumpel <- corpus_subset(fiscalcorpus_crisis, speaker == "Tumpel") corpus_trichet <- corpus_subset(fiscalcorpus_crisis, speaker == "Trichet") corpus_asmussen <- corpus_subset(fiscalcorpus_crisis, speaker == "Asmussen") corpus_gonzalez <- corpus_subset(fiscalcorpus_crisis, speaker == "Gonzalez") corpus_stark <- corpus_subset(fiscalcorpus_crisis, speaker == "Stark") corpus_papademos <- corpus_subset(fiscalcorpus_crisis, speaker == "Papademos") corpus_draghi <- corpus_subset(fiscalcorpus_crisis, speaker == "Draghi") corpus_binismaghi <- corpus_subset(fiscalcorpus_crisis, speaker == "BiniSmaghi") corpus_issing <- corpus_subset(fiscalcorpus_crisis, speaker == "Issing") corpus_praet <- corpus_subset(fiscalcorpus_crisis, speaker == "Praet") corpus_lautenschlager <- corpus_subset(fiscalcorpus_crisis, speaker == "Lautenschlager") corpus_hamalainen <- corpus_subset(fiscalcorpus_crisis, speaker == "Hamalainen") corpus_padoaschioppa <- corpus_subset(fiscalcorpus_crisis, speaker == "PadoaSchioppa") corpus_constancio <- corpus_subset(fiscalcorpus_crisis, speaker == "Constancio") corpus_duisenberg <- corpus_subset(fiscalcorpus_crisis, speaker == "Duisenberg") corpus_mersch <- corpus_subset(fiscalcorpus_crisis, speaker == "Mersch") ### Scaling by Speaker setwd("~/ECB Communication by Board Member") out <- texts(corpus_coeure) thetaOut <- paste0("Coeure.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_trichet) thetaOut <- paste0("Trichet.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_asmussen) thetaOut <- paste0("Asmussen.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_gonzalez) thetaOut <- paste0("Gonzalez.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_stark) thetaOut <- paste0("Stark.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_draghi) thetaOut <- paste0("Draghi.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_binismaghi) thetaOut <- paste0("Binismaghi.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_praet) thetaOut <- paste0("Praet.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_lautenschlager) thetaOut <- paste0("Lautenschlager.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_constancio) thetaOut <- paste0("Constancio.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_mersch) thetaOut <- paste0("Mersch.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") out <- texts(corpus_tumpel) thetaOut <- paste0("Tumpel.txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t") data <- readtext("~/ECB Communication by Board Member/*.txt") tt <- list.files(path = "~/ECB Communication by Board Member") data$speaker <- str_extract(tt, "([A-z]+)") final_corpus <- corpus(data) finalDfm <- dfm(final_corpus, tolower = FALSE, remove_punct = TRUE, remove_numbers = TRUE, stem=TRUE, remove = stopwords("english")) finalDfm <- dfm_trim(finalDfm, sparsity = 0.8) ### Figure 3 wfm <- textmodel_wordfish(finalDfm, dir = c(5,1)) plot_wfm <- textplot_scale1d(wfm) score <- plot_wfm$data$theta upper <- plot_wfm$data$upper lower <- plot_wfm$data$lower doc <- c("Asmussen", "Bini Smaghi", "Coeuré", "Constâncio", "Draghi", "González-Páramo", "Lautenschläger", "Mersch", "Papademos", "Praet", "Stark", "Trichet", "Tumpel-Gugerell") member_data <- data.frame(member = doc, score = score, lower= lower, upper= upper) member_data <- arrange(member_data, desc(score)) member_data$member <- factor(member_data$member, c("Asmussen", "Bini Smaghi", "Coeuré", "Constâncio", "Draghi", "González-Páramo", "Lautenschläger", "Mersch", "Papademos", "Praet", "Stark", "Trichet", "Tumpel-Gugerell")) plot_speaker <- ggplot(member_data, aes(x = reorder(member, score), y = score)) + geom_point(size=2.5, shape=21, fill="black") + geom_errorbar(data = member_data, mapping=aes (x = member, ymin = upper, ymax = lower), width = 0.05, size = 0.5) + ylab("Systemic Risk vs. Fiscal Discipline") + xlab("ECB Executive Board Member") + coord_flip() + theme_hc() + theme(plot.title = element_text(hjust = 0.5)) + theme(axis.title.x = element_text(size=16, margin=margin(15,0,0,0)), axis.title.y = element_text(size=16, margin=margin(0,15,0,0)), axis.text.x = element_text(vjust=0.5, size=14), axis.text.y = element_text(vjust=0.5, size=14)) + geom_hline(yintercept=0, linetype="dashed", colour="lightgrey") plot_speaker + theme(axis.title.x = element_text(size=20, margin=margin(15,0,0,0)), axis.title.y = element_text(size=20, margin=margin(0,15,0,0)), axis.text.x = element_text(vjust=0.5, size=18), axis.text.y = element_text(vjust=0.5, size=18)) ####################################################### # Results by Quarter and Speaker ####################################################### ### Subset by Quarter and Speaker yq <- as.yearqtr(docvars(fiscalcorpus, "Date"), format = "%Y-%m-%d") docvars(fiscalcorpus, "quarter") <- yq for(i in 2009:2016){ for(j in 1:4) { for(c in c("Trichet", "Draghi", "Stark", "Asmussen", "Coeure", "BiniSmaghi", "Mersch", "Constancio", "Papademos", "Praet", "Gonzalez", "Lautenschlager", "Tumpel")) { corpus_new <- paste0("corpus_",i,"Q",j,"_",c) corpus_to_assign <- corpus_subset(fiscalcorpus, quarter == paste0(i," ", "Q", j) & speaker == c) assign(corpus_new, corpus_to_assign)}}} ### Scaling by Quarter and Speaker setwd("~/ECB Communication by Quarter and Speaker") for(i in 2009:2016){ for(j in 1:4) { for(c in c("Trichet", "Draghi", "Stark", "Asmussen", "Coeure", "BiniSmaghi", "Mersch", "Constancio", "Papademos", "Praet", "Gonzalez", "Lautenschlager", "Tumpel")) { out <- texts(get(paste0("corpus_",i,"Q",j,"_",c))) thetaOut <- paste0(i,"Q",j,"_",c,".txt") write.table(out, thetaOut, quote = F, row.names = F, sep = "\t\t")}}} data <- readtext("~/ECB Communication by Quarter and Speaker/*.txt") tt <- list.files(path = "~/ECB Communication by Quarter and Speaker") data$speaker <- tt final_corpus <- corpus(data) sentences <- summary(final_corpus, n = 416)$Sentences docvars(final_corpus, "Sentences") <- paste(sentences) final_quarter_member <- corpus_subset(final_corpus, Sentences > 1) finalDfm <- dfm(final_quarter_member, tolower = FALSE, remove_punct = TRUE, remove_numbers = TRUE, stem=TRUE, remove = stopwords("english")) finalDfm <- dfm_trim(finalDfm, sparsity = 0.98) ### Figure 4 wfm <- textmodel_wordfish(finalDfm, dir = c(1,5)) plot_wfm <- textplot_scale1d(wfm) score <- plot_wfm$data$theta upper <- plot_wfm$data$upper lower <- plot_wfm$data$lower doc <- gsub("20", "", plot_wfm$data$doclabels) doc <- gsub(".txt", "", doc) member <- substr(doc, 6 , 25) year <- substr(doc, 1, 2) quarter <- substr(doc, 3, 4) quarter_speaker_data <- data.frame(member = member, quarter = quarter, year = as.numeric(paste0("20", year)), score = score, lower= lower, upper= upper, stringsAsFactors = F) category <- function(x) if(x == "Coeure") "Coeuré" else if (x == "BiniSmaghi") "Bini Smaghi" else if (x == "Lautenschlager") "Lautenschläger" else if (x == "Constancio") "Constâncio" else if (x == "Tumpel") "Tumpel-Gugerell" else if (x == "Gonzalez") "González Páramo" else x name <- sapply(quarter_speaker_data$member,category) quarter_speaker_data$member <- unname(name) quarter_speaker_data$member <- as.factor(quarter_speaker_data$member) quarter_speaker_data$quarter_year <- as.yearqtr(paste(quarter_speaker_data$year, quarter_speaker_data$quarter, sep = " ")) quarter_speaker_data$quarter_year <- as.Date(quarter_speaker_data$quarter_year) plot_quarter_speaker <- ggplot(quarter_speaker_data, aes(quarter_year, score)) + geom_point(size=1.5, shape=21, fill="black") + scale_y_continuous(breaks = seq(-2, 2, by = 1)) + geom_errorbar(data = quarter_speaker_data, mapping=aes (x = quarter_year, ymin = upper, ymax = lower), width = 0.05, size = 0.5) + ylab("Systemic Risk vs. Fiscal Discipline") + xlab("Year") + scale_x_date(date_breaks = "1 year", date_labels = "%Y") + theme_bw() + theme(plot.title = element_text(hjust = 0.5)) + geom_smooth(formula = y ~ x) + geom_vline(xintercept = as.numeric(as.Date("2012-07-01")), linetype="dashed", colour="lightgrey") plot_quarter_speaker + facet_wrap(~member, nc=5, scales = "free_x") + theme_hc() + theme(plot.margin=unit(c(0.5,0.5,1,0.6),"cm")) + theme(axis.title.x = element_text(size=20, margin=margin(20,0,0,0)), axis.title.y = element_text(size=20, margin=margin(0,20,0,0)), axis.text.x = element_text(vjust=0.5, size=10), axis.text.y = element_text(vjust=0.5, size=18)) ##### Figure 5 quarter_speaker_draghi <- quarter_speaker_data[quarter_speaker_data$year==2011 & quarter_speaker_data$member=="Draghi",] quarter_speaker_trichet <- quarter_speaker_data[quarter_speaker_data$year==2011 & quarter_speaker_data$member=="Trichet" & quarter_speaker_data$quarter=="Q4",] quarter_speaker_stark <- quarter_speaker_data[quarter_speaker_data$year==2011 & quarter_speaker_data$member=="Stark" & quarter_speaker_data$quarter=="Q4",] quarter_speaker_asmussen <- quarter_speaker_data[quarter_speaker_data$year==2012 & quarter_speaker_data$member=="Asmussen" & quarter_speaker_data$quarter=="Q1",] differences_dt <- rbind(quarter_speaker_trichet, quarter_speaker_draghi, quarter_speaker_stark, quarter_speaker_asmussen) differences_dt$member <- as.character(differences_dt$member) differences_dt$member[1] <- "Trichet - 2011 Q4" differences_dt$member[2] <- "Draghi - 2011 Q4" differences_dt$member[3] <- "Stark - 2011 Q4" differences_dt$member[4] <- "Asmussen - 2012 Q1" differences_dt$member <- factor(differences_dt$member, levels = differences_dt$member) graph_differences <- ggplot(data = differences_dt, aes (x= member, y= score)) + geom_point(size=4, shape=21, fill="black") + coord_flip() + theme_hc() + geom_errorbar(aes (x = member, ymin = upper, ymax = lower), width = 0.1, size = 0.25) + ylab("Systemic Risk vs. Fiscal Discipline") + xlab("") + theme(axis.title.y = element_text(margin=margin(0,20,0,0)), axis.title.x = element_text(margin=margin(20,0,0,0))) + theme(plot.title = element_text(hjust= 0.5, vjust=1, size=18, margin=margin(0,0,20,0))) + scale_x_discrete(limits = rev(levels(differences_dt$member))) graph_differences + theme(axis.title.x = element_text(size=16, margin=margin(20,0,0,0)), axis.title.y = element_text(size=16, margin=margin(0,20,0,0)), axis.text.x = element_text(vjust=0.5, size=14), axis.text.y = element_text(vjust=0.5, size=14)) ############### T-test ################# quarter_speaker_data_0910 <- quarter_speaker_data[quarter_speaker_data$year<2011,] quarter_speaker_data_1112 <- quarter_speaker_data[quarter_speaker_data$quarter_year>=as.Date("2011-01-01") & quarter_speaker_data$quarter_year