/* Sugi29writeReaderForDDIFile.sas - write a SAS program that reads a file */ /* described by a DDI file */ /* example for SUGI29 paper */ /* Larry Hoyle Sept, 2003 */ %let targetPath=D:\data\icpsr\data\6084\da06084.txt; %let readerPath=D:\projects\sugs\sugi29\sascode\readda06084.sas; *'; /************************************************************ * Generated by XMLAtlas, v. 9.0.1 ************************************************************/ /* * ENVIRONMENT */ filename noStyles 'D:\projects\sugs\sugi29\xmlCodebooks\06084noStylesheet.xml'; filename SXLEMAP 'D:\projects\sugs\sugi29\sascode\icpsr06084.map'; libname noStyles xml xmlmap=SXLEMAP access=READONLY; /* * CATALOG proc datasets lib=noStyles; run; */ /* * Resultant File layouts */ /* proc contents data=noStyles.titlStmt varnum; run; proc contents data=noStyles.sumDscr varnum; run; proc contents data=noStyles.method varnum; run; proc contents data=noStyles.fileTxt varnum; run; proc contents data=noStyles.DataDscrVar varnum; run; proc contents data=noStyles.DataDscrVarInvalrng varnum; run; proc contents data=noStyles.DatadscrVarCatgry varnum; run; */ /* proc print data=noStyles.titlStmt; run; proc print data=noStyles.sumDscr; run; proc print data=noStyles.method; run; proc print data=noStyles.fileTxt; run; proc print data=noStyles.DataDscrVar; run; proc print data=noStyles.DataDscrVarInvalrng; run; proc print data=noStyles.DatadscrVarCatgry; run; */ /* generate the PUT statements for the metadata comment section */ /* tabs are turned to blanks, successive blanks are replaced by one */ /* "star slash" is turned to star underscore slash" */ %macro putInCmnt(v=name, ve=vEdited, h=heading); &ve=compbl(tranwrd(translate(&v,' ','09'x),'*/','*_/')); if &ve ne ' ' then put / "&h" / ' ' &ve; %mend putInCmnt; /* for strings to be written in single quotes any embedded */ /* single quotes must be doubled */ %macro pairSingle(vs=a, vd=b); rxsingleq=rxparse("$""'"" TO ""''"""); call rxchange(rxsingleq,9999,&vs,&vd); %mend pairSingle; filename reader "&readerPath"; data _null_; /* begin writing metadata into a big comment */ file reader lrecl=1024 ; length vEdited $ 2000; set nostyles.titlstmt; titl= compbl(tranwrd(translate(titl,' ','09'x),'*/','*_/')); titl= compbl(tranwrd(translate(titl,' ','09'x),'*/','*_/')); put '/* SAS program to read ' agency ' ' IDNo ; titl= compbl(tranwrd(translate(titl,' ','09'x),'*/','*_/')); put titl; altTitl=compbl(tranwrd(translate(altTitl,' ','09'x),'*/','*_/')); put altTitl; put //; set Nostyles.Filetxt; %putInCmnt(v=fileName, ve=vEdited, h=File Name:); %putInCmnt(v=caseQnty, ve=vEdited, h=Number of Cases:); %putInCmnt(v=varQnty, ve=vEdited, h=Number of Variables:); %putInCmnt(v=logRecl, ve=vEdited, h=Logical Record Length); %putInCmnt(v=recPrCas, ve=vEdited, h=Records Per Case:); %putInCmnt(v=recNumTot, ve=vEdited, h=Total Number of Records:); %putInCmnt(v=fileType, ve=vEdited, h=File Type:); %putInCmnt(v=format, ve=vEdited, h=Format:); %putInCmnt(v=dataMsng, ve=vEdited, h=Missing Data:); set nostyles.method; %putInCmnt(v=timeMeth, ve=vEdited, h=Time method:); %putInCmnt(v=dataCollector, ve=vEdited, h=Data Collector:); %putInCmnt(v=frequenc, ve=vEdited, h=Frequency:); %putInCmnt(v=sampProc, ve=vEdited, h=Sampling Procedure:); %putInCmnt(v=collMode, ve=vEdited, h=Collection Mode:); %putInCmnt(v=resInstru, ve=vEdited, h=Research Instrument:); %putInCmnt(v=weight, ve=vEdited, h=Weight:); set Nostyles.Sumdscr; %putInCmnt(v=StartDate, ve=vEdited, h=StartDate:); %putInCmnt(v=EndDate, ve=vEdited, h=EndDate:); %putInCmnt(v=nation, ve=vEdited, h=Nation:); %putInCmnt(v=anlyUnit, ve=vEdited, h=Analysis Unit:); %putInCmnt(v=Universe, ve=vEdited, h=Universe:); %putInCmnt(v=dataKind, ve=vEdited, h=Kind of Data:); set Nostyles.abstract; abstract=compbl(translate(abstract,' ','09'x)); if abstract ne ' ' then put / 'Abstract: ' / ' ' abstract; %putInCmnt(v=abstract, ve=vEdited, h=Abstract:); put '*/'; run; proc sql; create table DATADSCRVARcatgry as select translate(name,'_______','''";/*&%') as safename,* from NOSTYLES.DATADSCRVARcatgry order by name,catValu; create table catcounts as select name, count(name) as ncats from DATADSCRVARcatgry group by name; quit; proc sql; create table DATADSCRVAR1 as select "V"||put(monotonic(),z5.)||"f" as varfmt, translate(name,'_______','''";/*&%') as safename, * from NOSTYLES.DATADSCRVAR order by recSegNo,startPos; create table datadscrVar as select DATADSCRVAR1.*, catcounts.ncats from DATADSCRVAR1 left join catcounts on DATADSCRVAR1.name= catcounts.name order by recSegNo,startPos; quit; proc format; value mvals 0='.A' 1='.B' 2='.C' 3='.D' 4='.E' 5='.F' 6='.G' 7='.H' 8='.I' 9='.J' 10='.K' 11='.L' 12='.M' 13='.N' 14='.O' 15='.P' 16='.Q' 17='.R' 18='.S' 19='.T' 20='.U' 21='.V' 22='.W' 23='.X' 24='.Y' 25='.Z'; /* write a PROC FORMAT */ proc sql; create table work.cats as select DataDscrVar.varFmt, DatadscrVarCatgry.*, DataDscrVar.formatType, case upcase(missing) when 'Y' then put(mod(monotonic(),26),mvals.) else ' ' end as SASmisVal from DatadscrVarCatgry , DatadscrVar where DatadscrVar.name=DatadscrVarCatgry.name order by name, catValu; quit; data _null_; /* begin writing the PROC FORMAT */ set cats end=last; by name catValu; length fvalue $ 200; length cvalue $ 1000; file reader lrecl=1024 mod; if _n_=1 then do; put /// " /* formats for variables with defined response categories */"; put "proc format;"; end; if first.name then do; put 'value ' varFmt ' /* format for variable ' safename '*/'; end; %pairsingle(vs=labl, vd=fvalue); fvalue=compbl(translate(fvalue,' ','09'x)); if upcase(formatType)="NUMERIC" then do; if verify(catValu,'0123456789.- ')=0 then DO; put catValu "='" fvalue +(-1) "'" ; if upcase(missing) = 'Y' then put SASmisVal "='" fvalue +(-1) "'" ; end; end; if upcase(formatType) NE "NUMERIC" then do; %pairsingle(vs=catValu, vd=cvalue); put "'" cvalue "'='" fvalue +(-1) "';" ; end; if last.name then do; put ";"; end; run; data _null_; /* begin writing datastep to read the variables */ set DATADSCRVAR end=last; file reader lrecl=1024 mod; length ftype $ 3; if _n_=1 then do; set Nostyles.Filetxt; set Nostyles.titlstmt; put ///; /* remove unsafe characters */ agency=translate(agency,'_______','''";/*&%'); IDNo=translate(IDNo,'_______','''";/*&%'); logrecl=translate(logrecl,'_______','''";/*&%'); RecSegNo=translate(RecSegNo,'_______','''";/*&%'); ftype=translate(ftype,'_______','''";/*&%'); StartPos=translate(StartPos,'_______','''";/*&%'); EndPos=translate(EndPos,'_______','''";/*&%'); put 'data ' agency +(-1) IDNo ';'; put "infile '&targetPath' LRECL=" logrecl " PAD;"; put "input "; end; /* position of each variable */ if RecSegNo ne ' ' and StartPos ne ' ' and EndPos ne ' ' then do; if upcase(formatType) = 'NUMERIC' then ftype = ' '; else ftype = ' $ '; put "#" RecSegNo safename ftype StartPos +(-1) "-" EndPos ; end; if last then do; put ";"; end; run; /* assign missing values */ data _null_; set cats end=last; by name catValu; file reader lrecl=1024 mod; if _n_=1 then do; put '/* replace missing data with unique SAS missing values */ '; end; if upcase(missing)='Y' and upcase(formatType) = 'NUMERIC' then put ' if ' name ' = ' catValu ' then ' name ' = ' SASmisVal ';'; run; /* variable labels */ data _null_; /* write a label for the variables with something to use as a label */ set DATADSCRVAR end=last; length vlabel1 $ 256; length vlabel $ 256; file reader lrecl=1024 mod; if _n_=1 then do; put ///; end; if labl ne ' ' then vlabel1=compbl(translate(labl,' ','09'x)); else if qstnLit ne ' ' then vlabel1=compbl(translate(qstnLit,' ','09'x)); %pairsingle(vs=vlabel1, vd=vlabel); if vlabel ne ' ' then put ' label ' safename '=' "'" vlabel "';"; run; /* variable formats */ data _null_; /* write a format for each variables with categories labeled */ set DATADSCRVAR end=last; where ncats ne . ; file reader lrecl=1024 mod; if _n_=1 then do; put /// '/* This section will associate formats with each variable that has labeled categories */'; put '/* you may want to comment it out. */'; end; put ' format ' safename ' ' varfmt +(-1) '.;' ; run; data _null_; /* END the DATASTEP */ file reader lrecl=1024 mod; put 'run;'; run;