/* generateReaderForICPSR2825.sas - write a SAS program that reads ICPSR study 2825 */ /* */ /* example for SUGI30 paper */ /* Larry Hoyle Sept, 2004 */ %let targetPath=D:\data\icpsr\data\2825\da2825.txt; %let readerPath=D:\projects\sugs\sugi30\paper\sascode\DataFileReaders\readDA2825.sas; *'; /* * ENVIRONMENT */ filename DDIfile 'D:\data\icpsr\DDI\examples\ICPSR2825\02825.xml'; filename DDImap 'D:\projects\sugs\sugi30\paper\sascode\XMLmaps\DDI.map'; libname DDIfile xml xmlmap=DDImap access=READONLY; /* * CATALOG proc datasets lib=DDIfile; run; */ /* * Resultant File layouts */ /* proc contents data=DDIfile.stdyDscr_citation_titlStmt varnum; run; proc contents data=DDIfile.stdyDscr_stdyInfo_sumDscr varnum; run; proc contents data=DDIfile.stdyDscr_method_dataColl varnum; run; proc contents data=DDIfile.stdyDscr_citation_prodStmt; run; proc contents data=DDIfile.stdyDscr_citation_biblCit; run; proc contents data=DDIfile.fileDscr_fileTxt varnum; run; proc contents data=DDIfile.DataDscr_Var varnum; run; proc contents data=DDIfile.DataDscr_Var_Valrng_range varnum; run; proc contents data=DDIfile.DataDscr_Var_Invalrng_range varnum; run; proc contents data=DDIfile.DataDscr_Var_Catgry varnum; run; proc contents data=DDIfile.DataDscr_varGrp varnum; run; */ /* proc print data=DDIfile.stdyDscr_citation_titlStmt; run; proc print data=DDIfile.stdyDscr_stdyInfo_sumDscr; run; proc print data=DDIfile.stdyDscr_method_dataColl; run; proc print data=DDIfile.stdyDscr_citation_prodStmt; run; proc print data=DDIfile.stdyDscr_citation_biblCit; run; proc print data=DDIfile.fileDscr_fileTxt; run; proc print data=DDIfile.DataDscr_Var; run; proc print data=DDIfile.DataDscr_Var_Valrng_range; run; proc print data=DDIfile.DataDscr_Var_Invalrng_range; run; proc print data=DDIfile.DataDscr_Var_Catgry; run; proc print data=DDIfile.DataDscr_varGrp; run; */ /* _________________________________________________________________ */ /* generate the PUT statements for the metadata comment section */ /* tabs are turned to blanks, successive blanks are replaced by one */ /* "star slash" is turned to star underscore slash" */ /* _________________________________________________________________ */ %macro putInCmnt(v=name, ve=vEdited, h=heading); &ve=compbl(tranwrd(translate(&v,' ','09'x),'*/','*_/')); if &ve ne ' ' then do; &ve=vname(&v)||" - "||vlabel(&v); put / "&h (" &ve ")" ; &ve=compbl(tranwrd(translate(&v,' ','09'x),'*/','*_/')); put ' ' &ve; end; %mend putInCmnt; /* macro for numeric variables */ %macro putNumericInCmnt(v=name, ve=vEdited, h=heading); if &v ne . then do; &ve=vname(&v)||" - "||vlabel(&v); put / "&h (" &ve ")" ; put ' ' &v; end; %mend putNumericInCmnt; /* for strings to be written in single quotes any embedded */ /* single quotes must be doubled */ %macro pairSingle(vs=a, vd=b); rxsingleq=rxparse("$""'"" TO ""''"""); call rxchange(rxsingleq,9999,&vs,&vd); %mend pairSingle; /* Some Elements are in their own dataset - generate a DATA Step to */ /* write each row to the metadata comment section */ %macro DatasetToCmnt(ds=, v=); %IF &ds ne %THEN %DO; data _null_; set DDIfile.&ds end=last nobs=nrows; file reader lrecl=1024 mod ; length vNameEdited $ 2000; length vEdited $ 2000; if nrows>0 then do; if _n_=1 then do; /* print variable description */ vNameEdited=vname(&v)||": - "||vlabel(&v); put / vNameEdited ; end; /* print each value */ vEdited=compbl(tranwrd(translate(&v,' ','09'x),'*/','*_/')); if vEdited ne ' ' then do; put ' ' vEdited; end; end; run; %END; %mend DatasetToCmnt; /* ____________________________________________ */ /* What kind of record groups are present? */ /* associate the rectype field with the */ /* and the position of the recIdVar variable */ /* with the other information about the group */ /* ____________________________________________ */ proc sql noprint; /* find a unique category for each record Group ID variable */ /* each should have only one category anyway */ create table recGrpCats as select varID,max(catValu) as catValu from DDIfile.DATADSCR_VAR_CATGRY, DDIfile.Filedscr_filetxt_recgrp where Filedscr_filetxt_recgrp.recgrpRecIdVar=DATADSCR_VAR_CATGRY.varID group by varID; /* find the rectype and location of the RecordGroup Id variable */ create table RG1 as select Filedscr_filetxt_recgrp.*, DATADSCR_VAR.rectype, DATADSCR_VAR.StartPos, DATADSCR_VAR.EndPos, DATADSCR_VAR.width, DATADSCR_VAR.RecSegNo from DDIfile.Filedscr_filetxt_recgrp, DDIfile.DATADSCR_VAR where DATADSCR_VAR.name = Filedscr_filetxt_recgrp.recgrpRecIdVar; /* find the value the identifies teh group */ create table RecordGroups as select RG1.*, recGrpCats.catValu from rg1,recGrpCats where recGrpCats.varID=RG1.recgrpRecIdVar; /* ____________________________________________ */ /* Find the distinct positions of recidvars */ /* ____________________________________________ */ create table UniqueRecordGroupIDs as select distinct 'RG'||trim(left(put(StartPos,10.)))||"_"||trim(left(case when EndPos=. then put(StartPos+width-1,10.) else put(Endpos,10.) end)) as RecordGroupSet, StartPos as RGSStartPos, case when EndPos=. then StartPos+width-1 else EndPos end as RGSEndPos, RecSegNo as RGSRecSegNo from recordGroups; /* _____________________________________________ */ /* count them */ /* this version will deal with only one position */ /* _____________________________________________ */ select trim(left(put(count(RecordGroupSet),10.))) into :NRecordGroupSets from UniqueRecordGroupIDs; select trim(left(put(coalesce(maxRecGrpLogRecl,maxFiletxtLogRecl),10.))) into :maxLogRecl from (select max(recGrpLogRecl) as maxRecGrpLogRecl from recordgroups group by recGrpID) full join (select max(FILEDSCR_FILETXT.logRecl) as maxFiletxtLogRecl from DDIfile.FILEDSCR_FILETXT) on 1=1; quit; %put Number of unique record group ID positions = &NRecordGroupSets; %put Largest Logical Record Length = &maxLogRecl; /* place the file structure into a macro variable */ /* a later version of this code will read Hierarchical files */ proc sql noprint; select lowcase(fileDscr_filetxt.fileStrcType) into :fileStructureType from DDIfile.fileDscr_filetxt; quit; %put &fileStructureType; filename reader "&readerPath"; /* __________________________________________ */ /* begin writing metadata into a big comment */ /* __________________________________________ */ data _null_; file reader lrecl=1024 ; length vEdited $ 2000; set DDIfile.stdyDscr_citation_titlStmt; if _n_=1 then put '/*' / ' SAS program to read ' agency ' ' IDNo ; stdyDscrTitl= compbl(tranwrd(translate(stdyDscrTitl,' ','09'x),'*/','*_/')); put 'Study Title' _n_ ': ' stdyDscrTitl; altTitl=compbl(tranwrd(translate(altTitl,' ','09'x),'*/','*_/')); put ' ' altTitl; put //; set DDIfile.fileDscr_fileTxt; %putInCmnt(v=fileName, ve=vEdited, h=File Name:); %putInCmnt(v=fileCont, ve=vEdited, h=File Contents:); %putInCmnt(v=fileStrcType, ve=vEdited, h=File Structure type:); %putNumericInCmnt(v=caseQnty, ve=vEdited, h=Number of Cases:); %putNumericInCmnt(v=varQnty, ve=vEdited, h=Number of Variables:); %putNumericInCmnt(v=logRecl, ve=vEdited, h=Logical Record Length); %putNumericInCmnt(v=recPrCas, ve=vEdited, h=Records Per Case:); %putNumericInCmnt(v=recNumTot, ve=vEdited, h=Total Number of Records:); %putInCmnt(v=fileType, ve=vEdited, h=File Type:); %putInCmnt(v=format, ve=vEdited, h=Format:); %putInCmnt(v=dataChck, ve=vEdited, h=Data checks:); %putInCmnt(v=dataMsng, ve=vEdited, h=Missing Data:); run; data _null_; /* Documentation of the documentation */ file reader lrecl=1024 mod ; put '_______________________________'; put '| About the Data Documentation |'; put '_______________________________'; run; %DatasetToCmnt(ds=docDscr_citation__titl, v=docDscrCitationTitl); %DatasetToCmnt(ds=docDscr_citation__IDNo, v=docDscrCitationIDNo); %DatasetToCmnt(ds=docDscr_citation__AuthEnty, v=docDscrCitationAuthEnty); %DatasetToCmnt(ds=docDscr_citation__producer, v=docDscrCitationProducer); %DatasetToCmnt(ds=docDscr_citation__prodDate, v=docDscrCitationProdDate); %DatasetToCmnt(ds=docDscr_citation__othID, v=docDscrCitationOthID); %DatasetToCmnt(ds=docDscr_citation__copyright, v=docDscrCitationCopyright); data _null_; /* Documentation of the Study */ file reader lrecl=1024 mod ; put '_____________________________'; put '| About the Study |'; put '_____________________________'; run; %DatasetToCmnt(ds=stdyDscr_citation_titlStmt, v=stdyDscrTitl); %DatasetToCmnt(ds=stdyDscr_citation_titlStmt, v=subTitl); %DatasetToCmnt(ds=stdyDscr_citation_titlStmt, v=altTitl); %DatasetToCmnt(ds=stdyDscr_citation_titlStmt, v=IDNo); %DatasetToCmnt(ds=stdyDscr_citation_titlStmt, v=agency); %DatasetToCmnt(ds=stdyDscr_citation__authEnty, v=stdyDscrCitationAuthEnty); %DatasetToCmnt(ds=stdyDscr_stdyInfo__keyword, v=stdyDscrStdyInfoKeyword); %DatasetToCmnt(ds=stdyDscr_stdyInfo__topcClas, v=stdyDscrStdyInfoTopcClas); %DatasetToCmnt(ds=stdyDscr_method__timeMeth, v=stdyDscrMethodTimeMeth); %DatasetToCmnt(ds=stdyDscr_method__dataCollector, v=stdyDscrMethodDataCollector); %DatasetToCmnt(ds=stdyDscr_method__frequenc, v=stdyDscrMethodFrequenc); %DatasetToCmnt(ds=stdyDscr_method__sampProc, v=stdyDscrMethodSampProc); %DatasetToCmnt(ds=stdyDscr_method__collMode, v=stdyDscrMethodCollMode); %DatasetToCmnt(ds=stdyDscr_method__resInstru, v=stdyDscrMethodResInstru); %DatasetToCmnt(ds=stdyDscr_method__weight, v=stdyDscrMethodWeight); %DatasetToCmnt(ds=stdyDscr_citation__producer, v=stdyDscrProducer); %DatasetToCmnt(ds=stdyDscr_citation__producer, v=stdyDscrProducerAffiliation); %DatasetToCmnt(ds=stdyDscr_citation__producer, v=stdyDscrProducerRole); %DatasetToCmnt(ds=stdyDscr_citation__prodDate, v=stdyDscrProdDate); %DatasetToCmnt(ds=stdyDscr_citation__copyright, v=stdyDscrCopyright); %DatasetToCmnt(ds=stdyDscr_citation__software, v=stdyDscrSoftware); %DatasetToCmnt(ds=stdyDscr_citation__fundAg, v=stdyDscrFundAg); %DatasetToCmnt(ds=stdyDscr_citation__grantNo, v=stdyDscrGrantNo); %DatasetToCmnt(ds=stdyDscr_citation__distrbtr, v=stdyDscrDistrbtr); %DatasetToCmnt(ds=stdyDscr_citation_verStmt, v=stdyDscrVersion); %DatasetToCmnt(ds=stdyDscr_stdyInfo__CollDate, v=stdyDscrCollDate); %DatasetToCmnt(ds=stdyDscr_stdyInfo__nation, v=stdyDscrNation); %DatasetToCmnt(ds=stdyDscr_stdyInfo__geogCover, v=stdyDscrGeogCover); %DatasetToCmnt(ds=stdyDscr_stdyInfo__timePrd, v=stdyDscrTimePrd); %DatasetToCmnt(ds=stdyDscr_stdyInfo__timePrd, v=stdyDscrTimePrdStart); %DatasetToCmnt(ds=stdyDscr_stdyInfo__timePrd, v=stdyDscrTimePrdEnd); %DatasetToCmnt(ds=stdyDscr_stdyInfo__timePrd, v=stdyDscrTimePrdSingle); %DatasetToCmnt(ds=stdyDscr_stdyInfo__anlyUnit, v=stdyDscrAnlyUnit); %DatasetToCmnt(ds=stdyDscr_stdyInfo__universe, v=stdyDscrUniverse); %DatasetToCmnt(ds=stdyDscr_stdyInfo__dataKind, v=stdyDscrDataKind); %DatasetToCmnt(ds=stdyDscr_stdyInfo_abstract,v=stdyDscrStdyInfoAbstract); data _null_; /* The bibliographic citation*/ file reader lrecl=1024 mod; length vEdited $ 2000; set DDIfile.stdyDscr_citation_biblCit; %putInCmnt(v=biblCit, ve=vEdited, h=Complete bibliographic reference:); %putInCmnt(v=biblCitFormat, ve=vEdited, h=Bibliographic reference format:); run; data _null_; file reader lrecl=1024 mod ; put '*/'; run; /* ________________________________________ */ /* write varGrp metadata into a big comment */ /* ________________________________________ */ data _null_; set DDIfile.DataDscr_varGrp end=last; file reader lrecl=1024 mod ; length vEdited $ 2000; if _n_=1 then do; put // '/* '; put '___________________________________________'; put '| Variables fall into the following groups |'; put '____________________________________________'; end; put ; %putInCmnt(v=ID, ve=vEdited, h=Group Unique ID:); %putInCmnt(v=labl, ve=vEdited, h=...Group label:); %putInCmnt(v=name, ve=vEdited, h=...Group name:); %putInCmnt(v=var, ve=vEdited, h=...Variables in the group:); %putInCmnt(v=varGrp, ve=vEdited, h=...Subgroups in the group:); %putInCmnt(v=txt, ve=vEdited, h=...Extended description:); %putInCmnt(v=concept, ve=vEdited, h=...Group concept:); %putInCmnt(v=defntn, ve=vEdited, h=...Group Rationale:); %putInCmnt(v=universe, ve=vEdited, h=...Group universe:); %putInCmnt(v=notes, ve=vEdited, h=...Group notes:); %putInCmnt(v=type, ve=vEdited, h=...Group type:); %putInCmnt(v=sdatarefs, ve=vEdited, h=ID values for elements within the summary data description:); %putInCmnt(v=methrefs, ve=vEdited, h=ID values for elements within the study methodology and processing section:); %putInCmnt(v=pubrefs, ve=vEdited, h=ID values for elements within the publication and citation sections:); %putInCmnt(v=access, ve=vEdited, h=ID values for elements within the data use section:); if last then do; put // ' */'; end; run; /* ________________________________________ */ /* write recGrp metadata into a big comment */ /* ________________________________________ */ data _null_; set RecordGroups end=last; file reader lrecl=1024 mod ; length vEdited $ 2000; if "&fileStructureType" eq "hierarchical" then do; if _n_=1 then do; put // '/* '; put '___________________________________________________________'; put '| This hierarchical file has the following record groups |'; put '___________________________________________________________'; if "&NRecordGroupSets" ne "1" then put / "WARNING: The number of distinct locations for the" / " record group ID variable is " "'&NRecordGroupSets'. The logic of this code will need to be modified to read the file correctly."; end; put ; %putInCmnt(v=recGrpID, ve=vEdited, h=Record Group ID); %putInCmnt(v=recGrpSubRecGrp, ve=vEdited, h=...Record Groups within this group); %putInCmnt(v=recGrpRectype, ve=vEdited, h=...Type of record listed for record group); %putInCmnt(v=recGrpKeyvar, ve=vEdited, h=...Link to other record types ); %putInCmnt(v=recGrpRtypeloc, ve=vEdited, h=...Record type location); %putNumericInCmnt(v=recGrpRtypewidth, ve=vEdited, h=...Record type variable width); %putInCmnt(v=recGrpRtypevtype, ve=vEdited, h=...Record type variable type); %putInCmnt(v=recGrpRecidvar, ve=vEdited, h=...Record type ID variable); %putInCmnt(v=recGrpLabl, ve=vEdited, h=...Label for record group ); %putNumericInCmnt(v=recGrpVarQnty, ve=vEdited, h=...Number of variables in record group); %putNumericInCmnt(v=recGrpCaseQnty, ve=vEdited, h=...Number of cases in record group); %putNumericInCmnt(v=recGrpLogRecl, ve=vEdited, h=...Logical record length for record group); %putInCmnt(v=rectype, ve=vEdited, h=.....Record ID variable record type); %putNumericInCmnt(v=StartPos, ve=vEdited, h=.....Record ID variable starting position); %putNumericInCmnt(v=EndPos, ve=vEdited, h=.....Record ID variable ending position); %putNumericInCmnt(v=width, ve=vEdited, h=.....Record ID variable width); %putNumericInCmnt(v=RecSegNo, ve=vEdited, h=.....Record ID variable record segment number); if last then do; put // ' */'; end; end; run; /* Add a SAS variable name to a sorted list of categories */ proc sql; create table DataDscr_Var_Catgry as select translate(varName,'_________','''";/*&%-.') as safename,* from DDIfile.DataDscr_Var_Catgry order by varName,catValu; create table catcounts as select varName, count(varName) as ncats from DataDscr_Var_Catgry group by varName; quit; /* Add the same variable name to the sorted list of variables */ proc sql; create table DataDscr_VAR1 as select "V"||put(monotonic(),z5.)||"f" as varfmt, translate(name,'_________','''";/*&%-.') as safename, * from DDIfile.DataDscr_VAR order by recSegNo,startPos; /* Add a count of categories to the variable list */ create table DataDscr_VAR2 as select DataDscr_VAR1.*, catcounts.ncats from DataDscr_VAR1 left join catcounts on DataDscr_VAR1.name= catcounts.varName order by rectype,recSegNo,startPos; /* Add the record group identifying value to the variable list */ create table DataDscr_VAR as select DataDscr_VAR2.*, Recordgroups.catValu,Recordgroups.rectype as RecGrpRectype from DataDscr_VAR2 left join RecordGroups on DataDscr_VAR2.rectype=RecordGroups.rectype order by rectype,recSegNo,startPos; quit; proc format; value mvals 0='.A' 1='.B' 2='.C' 3='.D' 4='.E' 5='.F' 6='.G' 7='.H' 8='.I' 9='.J' 10='.K' 11='.L' 12='.M' 13='.N' 14='.O' 15='.P' 16='.Q' 17='.R' 18='.S' 19='.T' 20='.U' 21='.V' 22='.W' 23='.X' 24='.Y' 25='.Z'; /* ________________________________________ */ /* create files used to write a PROC FORMAT */ /* ________________________________________ */ proc sql; create table work.cats as select varFmt as FMTNAME, case when upcase(DataDscr_Var.varFormatType) = 'NUMERIC' then DataDscr_Var.varFmt else '$'||DataDscr_Var.varFmt end as varFmt, DataDscr_Var_Catgry.*, DataDscr_Var.varFormatType, case when width > 0 then width else DataDscr_Var.EndPos-DataDscr_Var.StartPos+1 end as Default label='Default length', case upcase(missing) when 'Y' then put(mod(monotonic(),26),mvals.) else ' ' end as SASmisVal from DataDscr_Var_Catgry , DataDscr_Var where DataDscr_Var.name=DataDscr_Var_Catgry.varName order by varName, catValu; /* create a cntlin file to define formats */ create table WORK.MakeFormats as select FMTNAME label='Format name', case when upcase(varFormatType) = 'NUMERIC' then 'N' else 'C' end as Type label='Format Type', catValu as START label='Starting value for format', labl as LABEL label='Format value label', DEFAULT label='Default length' from work.cats where labl ne ' ' order by fmtname,start; /* other possible fields for a cntlin file END char(16) label='Ending value for format', range_MIN num label='Minimum length', range_MAX num label='Maximum length', LENGTH num label='Format length', TYPE char(1) label='Type of format', HLO char(11) label='Additional information', */ ; /* count the maximum number of labels per start value */ /* this will be used to warn about duplicates */ create table maxRepetitions as select fmtname, max(nRepetitions) as maxRepetitions from (select fmtname, count(label) as nRepetitions from MakeFormats group by fmtname,start) as nRepeats group by fmtname order by fmtname; quit; /* __________________________________________________ */ /* write a datastep to create the cntlin file inline */ /* __________________________________________________ */ %let maxLblLen=512; data _null_; file reader lrecl=1024 mod; set makeFormats end=last; by fmtname start; length shortLabel $ &maxLblLen; length lastLabel $ &maxLblLen; retain nOccur 0; if first.start then nOccur=0; if _n_=1 then do; put // 'data makeTheFormats;'; put " input fmtname $ 1-7 type $ 9-9 start $ 11-26 default 28-35 / label :&$&maxLblLen..;"; put 'datalines;'; end; /* concatenate duplicate labels */ nOccur=nOccur+1; /* tabs to blanks and */ shortLabel=compbl(translate(Label,',',';',' ','09'x)); /* cannot have a ; in the dataset */ if nOccur gt 1 then shortLabel=trim(lastLabel)||' _OR_ '||shortLabel; lastLabel=shortLabel; if last.start then do; if nOccur gt 1 then shortLabel="_EITHER_ " || trim(shortLabel) || ' _OR_ ' || shortLabel; put @1 fmtname $7. @9 type $1. @11 start $16. @28 default 8. / shortLabel; end; if last then do; put ';' / 'run;' //; end; run; /* *********************** */ /* *********************** */ /* ______________________________________________ */ /* write a PROC FORMAT that uses the cntlin file */ /* ______________________________________________ */ data _null_; file reader lrecl=1024 mod; put "proc format cntlin=makeTheFormats;"; put 'run;'; run; /* ______________________________________________ */ /* Begin writing a datastep to read the variables */ /* ______________________________________________ */ /* write out the data statement */ /* one file if rectangular */ /* multiple files with keep list if hierarchical */ data _null_; set datadscr_var end=last; by rectype; file reader lrecl=1024 mod; if _n_=1 then do; set DDIfile.fileDscr_fileTxt; set DDIfile.stdyDscr_citation_titlStmt; if &NRecordGroupSets ge 1 then set UniqueRecordGroupIDs; retain RecordGroupSet RGSStartPos RGSEndPos RGSRecSegNo; length safeAgency $ 8; length safeIDno $ 8; retain safeIDNo safeAgency; put '/* _______________________________________'; put ' | This DataStep reads the data file |'; put ' _______________________________________*/'; put ///; if &NRecordGroupSets gt 1 then put // '%macro FixTheLogicThatFollows; /* Manually modify the logic below for reading the hierarchy */' //; /* remove unsafe characters */ safeAgency=translate(agency,'_________','''";/*&%-.'); safeIDNo=translate(IDNo,'_________','''";/*&%-.'); put 'data ' @; end; if first.rectype then do; put safeAgency +(-1) safeIDNo +(-1) rectype ' (keep = ' /@; end; put ' ' safename; if last.rectype then do; put ' ) '; end; if last then do; put ';'; put / "infile '&targetPath' LRECL=&maxLogRecl PAD;"; if &NRecordGroupSets gt 0 then do; put ' /* read the record group identifier */'; put "input #" RGSRecSegNo " _RecordSetIdentifier $ " RGSStartPos '- ' RGSEndPos ' @;' ; end; end; run; data _null_; set DataDscr_VAR end=last; by rectype; file reader lrecl=1024 mod; length ftype $ 3; if _n_=1 then do; set DDIfile.fileDscr_fileTxt; set DDIfile.stdyDscr_citation_titlStmt; /* remove unsafe characters */ safeAgency=translate(agency,'_________','''";/*&%-.'); safeIDNo=translate(IDNo,'_________','''";/*&%-.'); end; retain safeAgency safeIDno; if first.rectype then do; if "&fileStructureType" eq "hierarchical" then do; put // 'if left(_RecordSetIdentifier) eq left("' catValu '") then ' ; end; put 'input '; end; /* position of each variable */ if RecSegNo = . then RecSegNo=1; /* positions described with start and end */ if RecSegNo ne . and StartPos ne . and EndPos ne . then do; if upcase(varFormatType) = 'NUMERIC' then ftype = ' '; else ftype = ' $ '; put "#" RecSegNo safename ftype StartPos +(-1) "-" EndPos @; if upcase(varFormatType) = 'NUMERIC' and dcml ne . then put ' .' dcml @; if upcase(varFormatType) NE 'NUMERIC' and dcml ne . then put ' /* NOTE: Character variable with ' dcml ' implied decimal places */'; else put; end; /* positions described with start and width */ else if RecSegNo ne . and StartPos ne . and width ne . then do; EndPos=StartPos+width-1; if upcase(varFormatType) = 'NUMERIC' then ftype = ' '; else ftype = ' $ '; put "#" RecSegNo safename ftype StartPos +(-1) "-" EndPos @; if upcase(varFormatType) = 'NUMERIC' and dcml ne . then put ' .' dcml @; if upcase(varFormatType) NE 'NUMERIC' and dcml ne . then put ' /* NOTE: Character variable with ' dcml ' implied decimal places */'; else put; end; /* positions described with start only (free-format) */ else if RecSegNo ne . and StartPos ne . and width eq . and EndPos eq . then do; if upcase(varFormatType) = 'NUMERIC' then ftype = ' '; else ftype = ' $ '; put "#" RecSegNo safename ftype @; if dcml ne . then put ' /* NOTE: variable has ' dcml ' implied decimal places */'; else put; end; if last.rectype then do; put ";"; end; run; /* assign missing values */ data _null_; set cats end=last; by varName catValu; file reader lrecl=1024 mod; if _n_=1 then do; put '/* replace missing data with unique SAS missing values */ '; end; if upcase(varFormatType) = 'NUMERIC' then do; if upcase(missing)='Y' then put ' if ' safename ' = ' catValu ' then ' safename ' = ' SASmisVal ';'; end; else do; if upcase(missing)='Y' then put ' if ' safename ' = ''' catValu +(-1) ''' then ' safename ' = ''_documented as invalid_'';'; end; run; /* short variable labels, commented out */ data _null_; set DataDscr_VAR end=last; length vlabel1 $ 256; length vlabel $ 256; file reader lrecl=1024 mod; if _n_=1 then do; put /// '/* short variable labels'; end; if labl ne ' ' then vlabel1=compbl(translate(labl,' ','09'x)); %pairsingle(vs=vlabel1, vd=vlabel); if vlabel ne ' ' then put ' label ' safename '=' "'" vlabel "';"; if last then put '*/'; run; /* long variable labels */ data _null_; set DataDscr_VAR end=last; length vlabel1 $ 256; length vlabel $ 256; file reader lrecl=1024 mod; if _n_=1 then do; put /// '/* long variable labels */'; end; if txt ne ' ' then vlabel1=compbl(translate(txt,' ','09'x)); %pairsingle(vs=vlabel1, vd=vlabel); if vlabel ne ' ' then put ' label ' safename '=' "'" vlabel "';"; run; /* variable formats */ /* some variables may have only blank format labels listed */ /* no need to list them here */ proc sql; create table FormattedVars as select distinct DataDscr_VAR.ncats, DataDscr_VAR.varfmt, DataDscr_VAR.varFormatType, DataDscr_VAR.safename, maxRepetitions.maxRepetitions from DataDscr_VAR,makeFormats,maxRepetitions where makeFormats.fmtname=DataDscr_VAR.varfmt and makeFormats.fmtname=maxRepetitions.fmtname and ncats > 0 order by safename,varfmt; quit; data _null_; length varfmt $ 9; /* write a format for each variables with categories labeled */ set FormattedVars end=last; file reader lrecl=1024 mod; if upcase(varFormatType) NE 'NUMERIC' then varfmt='$'||varfmt; if _n_=1 then do; put /// '/* This section will associate formats with each variable that has labeled categories */'; put '/* you may want to uncomment it . */'; end; put ' format ' safename ' ' varfmt +(-1) '.;' @; if maxRepetitions>1 then put ' /* NOTE: This format had more than one label for a given value */'; else put; /* if last then put '*' '/'; */ run; data _null_; if &NRecordGroupSets GE 1 then do; set recordgroups end=last; if _n_=1 then do; set DDIfile.fileDscr_fileTxt (obs=1); set DDIfile.stdyDscr_citation_titlStmt (obs=1); end; end; else do; set DDIfile.fileDscr_fileTxt (obs=1); set DDIfile.stdyDscr_citation_titlStmt (obs=1); end; /* END the DATASTEP */ file reader lrecl=1024 mod; /* remove unsafe characters */ safeAgency=translate(agency,'_________','''";/*&%-.'); safeIDNo=translate(IDNo,'_________','''";/*&%-.'); if _n_=1 then put / ; retain safeAgency safeIDno; if "&fileStructureType" eq "hierarchical" then do; put 'if left(_RecordSetIdentifier) eq left("' catValu '") then DO;' / " output " safeAgency +(-1) safeIDNo +(-1) rectype ';' / 'END;' //; end; if last then do; put 'run; /* and cross your fingers */'; if &NRecordGroupSets GT 1 then put '%mend FixTheLogicThatFollows; /* Manually modify the logic above for reading the hierarchy */'; put // '%put' " &readerpath. has finished reading:;"; put // '%put' " &targetpath.;"; end; run; %put all done;