1 year ago

#341784

test-img

Sunshine_student

R : How to add the valid number of observations in stacked bar with proportions?

I'm trying to add the n(valid answers) to a stacked-bar plot with stat_n_text() but since the final data frame that I used in the gggplot has already only the proportions of the 4 answers(in this case) the n shows to be 4 instead of e.g.20. Do you know how to include the original n in the ggplot? (Here an example of a graph)

enter image description here

For P1 the n should be e.g. 20. Here how the data looks like (subset of all observations) :

data <- structure(list(P1 = c(3, 4, 4, 4, 4, 3, 4, 4, 4, 4), P2 = c(4,3, 4, 3, 4, 4, 3, 4, 4, 4), P3 = c(4, 3, 4, 4, 4, 3, 4, 4, 4,4), P4 = c(1, 2, 4, 3, 4, 3, 4, 3, 4, 4), P5 = c(3, 3, 4, 4,4, 3, 3, NA, 4, 4)), row.names = c(149L, 150L, 151L, 152L, 153L,154L, 155L, 166L, 167L, 168L), class = "data.frame") 

dataframe for the labels (for titles,xlab etc.): itemDatasetSub:
 structure(list(No = c("P1", "P2", "P3", "P4", "P5"), Language = c("DE", 
"DE", "DE", "DE", "DE"), Item = c("Die/der direkte Vorgesetzte unterstützt das Spitexpersonal.", 
"Die Vorgesetzten nutzen Fehler (z. B. in der Pflege und Betreuung) zum gemeinsamen Lernen und nicht zum Kritisieren.", 
"Die/der direkte Vorgesetzte ist eine kompetente Führungsperson.", 
"Es gibt Lob und Anerkennung für eine gute Arbeitsleistung.", 
"Die/der direkte Vorgesetzte steht beim Treffen von Entscheidungen hinter dem Personal, auch wenn diese im Konflikt mit anderen Berufsgruppen oder Klienten/-innen stehen."
), Antwort0 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_), Antwort1 = c("Stimme überhaupt nicht zu", 
"Stimme überhaupt nicht zu", "Stimme überhaupt nicht zu", "Stimme überhaupt nicht zu", 
"Stimme überhaupt nicht zu"), Antwort2 = c("Stimme eher nicht zu", 
"Stimme eher nicht zu", "Stimme eher nicht zu", "Stimme eher nicht zu", 
"Stimme eher nicht zu"), Antwort3 = c("Stimme eher zu", "Stimme eher zu", 
"Stimme eher zu", "Stimme eher zu", "Stimme eher zu"), Antwort4 = c("Stimme voll und ganz zu", 
"Stimme voll und ganz zu", "Stimme voll und ganz zu", "Stimme voll und ganz zu", 
"Stimme voll und ganz zu"), Antwort5 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), Antwort6 = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
    Antwort7 = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_), `Titel der Grafik` = c("Führung", 
    NA, NA, NA, NA), `Titel Legende` = c("Antwortoptionen", "Antwortoptionen", 
    "Antwortoptionen", "Antwortoptionen", "Antwortoptionen"), 
    `Titel xlab` = c("Items", NA, NA, NA, NA), `Titel ylab` = c("% Zustimmung", 
    "% Zustimmung", "% Zustimmung", "% Zustimmung", "% Zustimmung"
    ), Einleitungstext = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_)), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame"))
 

Here the function (with n pasted as character) :

itemNoVec= c("P1","P2","P3","P4","P5")# needed when complete dataset
Stacked_Bar_5 <- function(itemNoVec,
                               language = DE, #German
                               restrict_level = 3, #under 3 do not show results
                               df = data, 
                               itemText = itemDatasetSub
){ 
  #Step 1:subset when complete dataset, here it wouldn't be needed
df <- subset(df, select = itemNoVec)
n1 =sum(! is.na(df$P1)) # Kind of solution to add the n-I couldn't set it as itemNoVec?!
n2 =sum(! is.na(df$P2))
n3 =sum(! is.na(df$P3))
n4 =sum(! is.na(df$P4))
n5 =sum(! is.na(df$P5))
  ansVecRaw <- as.vector(as.matrix(itemDatasetSub[1,4:11]))
  ansVecSingle <- ansVecRaw[!is.na(ansVecRaw)]
  ansVecMulti <- rep(ansVecSingle, times = length(itemNoVec))
  
  itemNoVecMulti <- rep(itemNoVec, each = length(ansVecSingle))
  
  #Calculate Answer Numbers

  if (is.na(ansVecRaw[1])){ 
    answ1 = 1;
  } else {
    answ1 = 0;
  }
  answN = length(ansVecSingle) 
  ansNoVec = (c(0:7) + answ1)[1:answN]
  ansNoVecMulti <- rep(ansNoVec, times = length(itemNoVec))
  
  #calculation od the percentages(resultVec)
  resultVec = replicate(length(itemNoVecMulti), 0) 
  for (i in 1 : length(itemNoVec)){ 
    valVecRaw = as.matrix(df[,i]) 
    valLeng = length(which(!is.na(valVecRaw))) 
    for(j in 1 : answN){ 
      if(valLeng >= restrict_level){
        valPer = length(which(valVecRaw == ansNoVec[j]))/valLeng
        resultVec[((i-1)*answN+j)] = valPer*100 
      } else {
        resultVec[((i-1)*answN+j)] <- NA
      }
    }
  }
  
  z = data.frame(itemNoVecMulti, ansVecMulti, resultVec) # create a data frame (z) of the three vectors

#Solution for now to include the n: 

  z$itemNoVecMulti[z$itemNoVecMulti == itemNoVec[1]] <- paste(c(as.character(itemDatasetSub$Item[1])),"(n= ",as.character(n1),")", sep = "")
  z$itemNoVecMulti[z$itemNoVecMulti == itemNoVec[2]] <- paste(c(as.character(itemDatasetSub$Item[2])),"(n= ",as.character(n2),")", sep = "")
  z$itemNoVecMulti[z$itemNoVecMulti == itemNoVec[3]] <- paste(c(as.character(itemDatasetSub$Item[3])),"(n= ",as.character(n3),")", sep = "")
  z$itemNoVecMulti[z$itemNoVecMulti == itemNoVec[4]] <- paste(c(as.character(itemDatasetSub$Item[4])),"(n= ",as.character(n4),")", sep= "")
  z$itemNoVecMulti[z$itemNoVecMulti == itemNoVec[5]] <- paste(c(as.character(itemDatasetSub$Item[5])),"(n= ",as.character(n5),")", sep = "")

  #Stacked bar colors
    colVec <- hcl.colors(n = answN, palette = "Blue-Red 2") 
    fill <- fct_rev(as_factor(ansVecMulti))
  
  z <- mutate(z, itemNoVecMulti = as_factor(itemNoVecMulti)) 

 g3<-ggplot(z,
               aes(fill= fill,
                   y=resultVec, x=fct_rev(itemNoVecMulti))) +
      geom_bar(position= "fill", stat="identity", width = 0.8) +
      scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
      coord_flip()  +
      ggtitle(itemDatasetSub$`Titel der Grafik`) + xlab("") + ylab(itemDatasetSub$`Titel ylab`) +
      theme_minimal() +
      geom_label_repel(aes(y=resultVec2,label=percent(resultVec/100, accuracy=0.1)),
                       size= 2.5, force = 0.001, vjust= "center", hjust = "center",
                       position=position_fill(vjust= 0.5), min.segment.length = Inf,
                       xlim = c(-1, Inf), ylim = c(-0.05, Inf)) +
      scale_fill_manual(name = c(itemDatasetSub$`Titel Legende`), values= colVec) +
      guides(fill = guide_legend(override.aes = aes(label=""), 
                                 reverse = TRUE, 
                                 direction = "horizontal", # legend direction
                                 title.position = "top", # position of title
                                 nrow = 2)) + 
      scale_x_discrete(labels = function(x) str_wrap(x, width = 60, indent = 0))
 
 g3
 g4<- g3 + stat_n_text(size=2, y.pos=-0.03) # gives n=4 

}
}

I'll really appreciate any help :) Thank you so much.

r

dataframe

ggplot2

stacked-chart

0 Answers

Your Answer

Accepted video resources