/* View the first 5 observations of the original data */ PROC PRINT DATA=BIO.PULSECSV (OBS=5); RUN; /* Show information about dataset, including list of variables */ /* VARNUM option: variables listed in order they appear in data */ PROC CONTENTS DATA=BIO.PULSECSV VARNUM; RUN; /* Frequency Tables for Categorical Variables - Without Labels or Translations */ PROC FREQ DATA=BIO.PULSECSV; TABLES GENDER SMOKES ALCOHOL EXERCISE TRT; RUN; /* PROC FORMAT creates the translations for codes */ /* It DOES NOT apply these translations to variables */ /* One PROC FORMAT can create translations that apply to many datasets */ /* One translation can be applied to many different variables */ /* This procedures just creates the translations which will be needed */ PROC FORMAT; /* The following is one VALUE statement which defines the SAS format GDR */ /* It will eventually be used for the variable GENDER in our data */ VALUE GDR 1 = "Male" 2 = "Female"; /* The following is another VALUE statement which defines the SAS format YN */ /* It will be used for the variables SMOKES and ALCOHOL in our data */ VALUE YN 1 = "Yes" 2 = "No"; /* Now we define two more to use for our variables EXERCISE and TRT */ VALUE EXER 1 = "High" 2 = "Moderate" 3 = "Low"; VALUE TREAT 1 = "Ran" 2 = "Sat"; RUN; /* Create new dataset with DATA step */ /* SET statement contains old dataset name */ /* LABEL statement provides descriptive labels for variables*/ /* FORMAT statement connects formats with variables for translations */ /* It is usually best to try to put all of your data manipulation in one DATA step but you may need to create it in steps, running to check as you go */ DATA BIO.PULSE_STEP2; SET BIO.PULSECSV; LABEL HEIGHT = "Height (cm)" WEIGHT = "Weight (kg)" AGE = "Age (years)" GENDER = "Gender" SMOKES = "Regular smoker?" ALCOHOL = "Regular drinker?" EXERCISE = "Frequency of exercise" TRT = "Whether the student ran or sat " PULSE1 = "First pulse measurement (bpm)" PULSE2 = "Second pulse measurement (bpm)" YEAR = "Year of class"; /* FORMAT statement can provide format many variables at once */ /* The format name follows the variable name or names and ends with a period */ /* The statement doesn't end until the semicolon */ FORMAT GENDER GDR. SMOKES ALCOHOL YN. EXERCISE EXER. TRT TREAT.; RUN; /* View the first 5 observations of the NEW data */ /* Notice we now see the translations in the print of the data */ PROC PRINT DATA=BIO.PULSE_STEP2 (OBS=5); RUN; /* Show information about NEW dataset */ /* The format names are shown in the variable list table */ PROC CONTENTS DATA=BIO.PULSE_STEP2 VARNUM; RUN; /* Frequency Tables for Categorical Variables - With Labels and Translations */ PROC FREQ DATA=BIO.PULSE_STEP2; TABLES GENDER SMOKES ALCOHOL EXERCISE TRT; RUN;