codebook_vignette_dir <- file.path(tempdir(), "codebook_vignette")
if(!dir.exists(codebook_vignette_dir)) {
dir.create(codebook_vignette_dir)
}This code created the /tmp/RtmpWUnMaO/codebook_vignette
directory. You can replicate this vignette with the code above, or you
can use your own directory for trying the codebook_create()
function.
The ‘eurobarometer’ R packagage contains two, very small subsamples
of Eurobarometer microdata file. You cannot work in the
system.file() of the package, so we will copy the contents
of this directory to the temporary folder.
sample_files <- dir(system.file("extdata", package = "eurobarometer"))
sample_files
#> [1] "ZA4529_sample.sav" "ZA5933_sample.sav"
sytem_file_paths <- file.path(
system.file("extdata", package = "eurobarometer"),
dir(system.file("extdata", package = "eurobarometer"))
)
for (i in seq_along(sample_files)) {
file.copy(sytem_file_paths[i], file.path(codebook_vignette_dir, sample_files[i]),
overwrite=TRUE)
}The resulting files can be found here:
sample_files <- file.path(codebook_vignette_dir, dir(codebook_vignette_dir))
sample_files
#> [1] "/tmp/RtmpWUnMaO/codebook_vignette/ZA4529_sample.sav"
#> [2] "/tmp/RtmpWUnMaO/codebook_vignette/ZA5933_sample.sav"Create A Survey Codebook
Let’s start with ZA5933_sample.sav, because it is a
better labelled source file.
sample_data <- read_sav_gesis(
file = sample_files[2])Let’s see the files contents:
simple_codebook <- codebook_create(sample_data, val_labels = FALSE, freq = FALSE)
simple_codebook
#> Codebook for Eurobarometer 82.4 (November-December 2014) [10.4232/1.13044]
#> survey ZACAT var_name_orig var_label_orig
#> 1 10.4232/1.13044 ZA5933 uri <NA>
#> 2 10.4232/1.13044 ZA5933 studyno1 ARCHIVE STUDY NUMBER - DISTRIBUTOR
#> 3 10.4232/1.13044 ZA5933 studyno2 ARCHIVE STUDY NUMBER - PUBLISHER
#> 4 10.4232/1.13044 ZA5933 doi <NA>
#> 5 10.4232/1.13044 ZA5933 edition DATASET EDITION
#> 6 10.4232/1.13044 ZA5933 survey SURVEY IDENTIFICATION
#> 7 10.4232/1.13044 ZA5933 caseid <NA>
#> 8 10.4232/1.13044 ZA5933 uniqid <NA>
#> 9 10.4232/1.13044 ZA5933 tnscntry ORIGINAL TNS COUNTRY/SAMPLE ID
#> 10 10.4232/1.13044 ZA5933 country COUNTRY/SAMPLE ID (SERIES STANDARD)
#>
#> ... 17 further observations.The simplified codebook is returned in a dataset, an s3
class of dataset
package that adds important structural and referencial metadata to an R
data.frame.
The get_zacat_data_url(simple_codebook$ZACAT[1]) command
will return the URL of the original dataset, https://search.gesis.org/research_data/ZA5933.
The entire codebook of the sample file would be very large to show,
so we subset the sample_data and create a more detailed
codebook for only the d25 and d60
variables.
full_codebook <- codebook_create(sample_data[, c("d25", "d60")],
val_labels = TRUE,
freq = TRUE)
full_codebook
#> Codebook for Eurobarometer 82.4 (November-December 2014) (subset) [10.4232/1.13044]
#> survey var_name_orig var_label_orig
#> 1 10.4232/1.13044 d25 TYPE OF COMMUNITY
#> 2 10.4232/1.13044 d25 TYPE OF COMMUNITY
#> 3 10.4232/1.13044 d25 TYPE OF COMMUNITY
#> 4 10.4232/1.13044 d25 TYPE OF COMMUNITY
#> 5 10.4232/1.13044 d60 DIFFICULTIES PAYING BILLS - LAST YEAR
#> 6 10.4232/1.13044 d60 DIFFICULTIES PAYING BILLS - LAST YEAR
#> 7 10.4232/1.13044 d60 DIFFICULTIES PAYING BILLS - LAST YEAR
#> 8 10.4232/1.13044 d60 DIFFICULTIES PAYING BILLS - LAST YEAR
#> val_code_orig val_label_orig
#> 1 1 Rural area or village
#> 2 2 Small or middle sized town
#> 3 3 Large town
#> 4 8 DK
#> 5 1 Most of the time
#> 6 2 From time to time
#> 7 3 Almost never/never
#> 8 7 Refusal (SPONT.)
full_codebook[full_codebook$var_name_orig=="d25", c("val_label_orig")]
#> [1] "Rural area or village" "Small or middle sized town"
#> [3] "Large town" "DK"Work with Multiple Surveys
# Create the joint codebook for multiple files
joint_codebook <- codebook_create(directory = codebook_vignette_dir,
val_labels = FALSE,
freq = FALSE)
#> Reading ZA4529_sample.sav
#> Reading ZA5933_sample.sav
#> Warning in data.frame(survey = rep(as.character(unlist(identifier)), df_rows), :
#> NAs introduced by coercion
# Create a random row selection
set.seed(2022)
random_rows <- c(1, sort(round(runif(11, 2, nrow(joint_codebook)),0)))
# Print only the 12 rows
joint_codebook[random_rows, ]
#> Codebook for Multiple Surveys (subset)
#> survey ZACAT var_name_orig
#> 1 10.4232/1.10983 ZA4529 uri
#> 12 10.4232/1.10983 ZA4529 v3
#> 24 10.4232/1.10983 ZA4529 v6
#> 34 10.4232/1.10983 ZA4529 v95
#> 216 10.4232/1.10983 ZA4529 v100
#> 316 10.4232/1.10983 ZA4529 v724
#> 518 10.4232/1.13044 ZA5933 tnscntry
#> 11101 10.4232/1.13044 ZA5933 d7
#> 262 10.4232/1.13044 ZA5933 nuts
#> 3311 10.4232/1.13044 ZA5933 nuts
#> var_label_orig val_code_orig
#> 1 <NA> NA
#> 12 <NA> NA
#> 24 NATION - ALL SAMPLES 24
#> 34 QA4 CULTURAL ACTIVITIES: CINEMA 3
#> 216 QA4 CULTURAL ACTIVITIES: HIST MONUMENTS 2
#> 316 D8 AGE EDUCATION 97
#> 518 ORIGINAL TNS COUNTRY/SAMPLE ID 6
#> 11101 MARITAL STATUS 11
#> 262 REGION - NUTS CODES NA
#> 3311 REGION - NUTS CODES NA
#> val_label_orig
#> 1 <NA>
#> 12 <NA>
#> 24 Lithuania
#> 34 3-5 times
#> 216 1-2 times
#> 316 No full-time education
#> 518 SUOMI
#> 11101 Divorced/Separated: living without children
#> 262 Thueringen
#> 3311 South East [England]
#>
#> ... 2 further observations.