/* SAS2DDI3.sas - Output a SAS dataset as a DDI3 XML file */ /* Larry Hoyle August 2007 */ /* First Draft Version August 17, 2007*/ /* August 22, 2007 14:40 revision */ /* August 30, 2007 16:25 revision */ /* January 15, 2008 13:30 revision - added capurint integrity constraints */ /* February 25, 2008 CR3 version changes */ /* March 6, 2008 revised to deal with missing integrity constraints */ /* March 9, 2008 StudyUnit for CNTLOUT dataset added */ /* March 12, 2008 modify insert into IC1 to deal with the differing number of columns in ic0 */ /* depending on the existence of foreign key references */ /* May 2008 - modified for the retified version of DDI 3.0 */ /* Metadata that can be extracted from the selected SAS dataset are written to the DDI file automatically */ /* Other metadata can be entered as arguments to the macro "SAS2DDI" at the bottom of the file */ /* This version could be revised to allow a more flexible form of input into the macro variables that define */ /* element content. */ /* When entering text into the %NRSTR() function the following note from the SAS documentation applies: */ /* "Note: If an unmatched single or double quotation mark or a left or right parenthesis is used */ /* with %STR or %NRSTR, */ /* these characters must be preceded by a percent sign (%)." */ /* With globalization of the macro variables this could easily be turned into a SAS Application Dispatcher service */ /* If you want to validate the XML output by this program you should revise the "schemaLoc" */ /* macro variable to point to the instance.xsd file you have downloaded (along with the other schema files) */ /* While the DDI 3 schema allows for XHMTML content in some elements, in this program all */ /* element content is escaped by the macro Xescape. A "<", for example, is rendered as "<" */ /* This will preserve the content but will inactivate any scripts in the XHTML */ /* Generate the TEST DATASET */ /* SAS2DDI3_data.sas - generate test data for SAS to DDI3 conversion program */ /* Larry Hoyle August 2007 */ proc format; /* there should be at least one user format defined so that the */ /* cntlout / cntlin dataset will exist formats not associated with a variable will be ignored */ /* Unless you define your own format "one." leaving this one here would be good */ value one 1 = 'One'; invalue sexin (just upcase) 'M' = 1 'MALE' = 1 'F' = 2 'FEMALE' = 2; ; value sexAll 0 = 'Young Male' 1 = 'Adult Male' 2 = 'Young Female' 3 = 'Adult Female' ; value sex 0 = 'Male' 1 = 'Male' 2 = 'Female' 3 = 'Female' ; /* format BMI is not used and is here to be ignored later */ value BMI low-<18.5 = "Underweight" 18.5-24.9 = "Normal weight" 25-29.9 = "Overweight" 30-high = "Obesity"; value $stext 'm','M','MALE','Male' = 'Boy' 'f','F','FEMALE','Female' = 'Girl'; /* format to test a range of numbers */ value avocadoNumber low-<0 = 'avocados owed' 1 = 'lonley avocado' 1<-6.02214149e23 = 'too few avocados' 6.02214149e23-6.02214209e23 = 'guaca mole' 6.02214209e23<-high = 'a party'; run; data mySASdata(label='Test Data for SAS to DDI 3 program'); length ID 8 name $ 30 avocado 8 sex 8 percentTime 8 DOB 8 TOB 8 entryDT 8; label ID = 'Identification Number'; label name = 'Respondant''s Name'; label avocado = 'Number of avacados'; label sex = 'Respondant''s Gender'; label percentTime = 'Percent of time counting Avacados'; label fee = 'Fee in Euros'; label entryDT = 'Date data entered'; label DOB = 'Date of birth'; label TOB = 'Time of Birth'; input id @4 sex sexin. @12 avocado percentTime fee DOB :date. TOB :time. name $ 49-78 ; entryDT=datetime(); format avocado avocadoNumber.; format sex sex.; format percentTime percent8.1; format fee EUROX10.2; format DOB IS8601DA.; format TOB IS8601TM.; format entryDT IS8601DT.; datalines; 1 M 1 .1 11000 29feb52 11:49 Joe Schmo 2 M 6.021e23 .2 10000 01jan72 14:01 Bill Hill 3 F 6.02214139e23 .3 9000 08jun85 05:25 Donna O'Fauna 4 M 6.02214149e23 .4 8000 25dec64 01:23 Rob "Bob" Cobb 5 m 6.02214159e23 .5 7000 15mar75 15:15 Tom Applebaum 6 f 6.02214179e23 .6 6000 05jun07 08:09 Louise Mac&Cheese 7 m 6.02214209e23 .7 5000 11nov11 11:11 Jack Black 8 F 6.02214219e23 .8 4000 01jan72 14:04 Jill Hill 9 m -3 .9 3000 01apr99 03:03 Gno Avocado 10 . . 2000 01jan60 01:01 ; run; /* Table of names for foreign key constraint */ data RealPeople; input RealName $30. ; datalines; Joe Schmo Bill Hill Donna O'Fauna Rob "Bob" Cobb Tom Applebaum Louise Mac&Cheese Jack Black Jill Hill Gno Avocado Poppy LaFleur Pickett Wilson Olive Hoyle ; run; /* must be a primary key, but not necessarily have the same name */ proc sql; alter table work.RealPeople add constraint prim_key Primary key(RealName); quit; /* add some integrity constraints */ /* both general (range and list) and referential */ proc sql; alter table work.mysasdata add constraint prim_key Primary key(id) add constraint DOB_present Not Null(DOB) add constraint id_GT_0 check(id GT 0) add constraint sex_MF check(sex in (.,1,2)) add constraint avocado_unique Unique(avocado) add constraint name_fkey foreign key(name) references work.RealPeople ; quit; proc sql; describe table work.mysasdata; quit; /* proc print data=mySASdata; run; */ /* **************************************************************************** */ /* ************************** end of generate Test dataset section ********* */ /* **************************************************************************** */ %Global DDILinesize; %Let DDILinesize=1024; /* maximum line length for the XML file (longer lines will fold) */ %Global maxParamLength; %Let maxParamLength=10000; /* maximum length of one of the macro variable arguments */ %Global IdentificationElementID; %Let IdentificationElementID=%NRSTR(testDDIFromSAS); /* V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V */ /* NOTE if this is revised as a Web service, the macro variables lib and dataset should not come from a Web form */ /* ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ */ filename ddi 'C:\DDRIVE\data\DDI\DDI30\SAS\SAS2DDI3\DDI3fromSAS.xml'; /* Some formats indicate measurement units and perhaps role of the variable */ /* These are some of those formats */ /* fmtname is the root name of the format (without the w.d) */ /* represents is the measurement unit that the format represents */ /* formatDescription is the description of the format from the SAS documentation */ data formatDocu; length fmtname $ 32 represents $ 32 formatDescription $ 200; input fmtname represents &:$char32. / formatDescription &:$char200./; datalines; DATE Date Writes Date values in the form ddmmmyy or ddmmmyyyy DATEAMP DateTime Writes DateTime values in the form ddmmmyy:hh:mm:ss.ss with AM or PM DATETIME DateTime Writes DateTime values in the form ddmmmyy:hh:mm:ss.ss DAY Date Writes Date values as the day of the month DDMMYY Date Writes Date values in the form ddmm{yy} yy or dd/mm/{yy}yy , where a forward slash is the separator and the year appears as either 2 or 4 digits DDMMYYB Date Writes Date values in the form ddmm{yy} yy or ddXmm {yy}yy , the year appears as either 2 or 4 digits DDMMYYC Date Writes Date values in the form ddmm{yy} yy or ddXmm:{yy}yy , the year appears as either 2 or 4 digits DDMMYYD Date Writes Date values in the form ddmm{yy} yy or ddXmm-{yy}yy , the year appears as either 2 or 4 digits DDMMYYN Date Writes Date values in the form ddmm{yy} yy or ddXmm{yy}yy , the year appears as either 2 or 4 digits DDMMYYP Date Writes Date values in the form ddmm{yy} yy or ddXmm.{yy}yy , the year appears as either 2 or 4 digits DDMMYYS Date Writes Date values in the form ddmm{yy} yy or ddXmm/{yy}yy , the year appears as either 2 or 4 digits DOWNAME Date Writes Date values as the name of the day of the week DTDATE DateTime Expects a DateTime value as input and writes date values in the form ddmmmyy or ddmmmyyyy DTMONYY Date Writes the Date part of a datetime value as the month and year in the form mmmyy or mmmyyyy DTWKDATX Date Writes the Date part of a datetime value as the day of the week and the date in the form day-of-week, dd month-name yy (or yyyy) DTYEAR Date Writes the Date part of a datetime value as the year in the form yy or yyyy DTYYQC Date Writes the Date part of a datetime value as the year and the quarter and separates them with a colon (:) EURDFDD Date Writes international Date values in the form dd.mm.yy or dd.mm.yyyy EURDFDE Date Writes international Date values in the form ddmmmyy or ddmmmyyyy EURDFDN Date Writes international Date values as the day of the week EURDFDT Date Writes international Datetime values in the form ddmmmyy:hh:mm:ss.ss or ddmmmyyyy hh:mm:ss.ss EURDFDWN Date Writes international Date values as the name of the day EURDFMN Date Writes international Date values as the name of the month EURDFMY Date Writes international Date values in the form mmmyy or mmmyyyy EURDFWDX Date Writes international Date values as the name of the month, the day, and the year in the form dd month-name yy (or yyyy ) EURDFWKX Date Writes international Date values as the name of the day and date in the form day-of-week, dd month-name yy (or yyyy) HDATE Date Writes Date values in the form yyyy mmmmm dd where dd is the day-of-the-month, mmmmm represents the month's name in Hebrew, and yyyy is the year HEBDATE Date Writes Date values according to the Jewish calendar HHMM Time Writes Time values as hours and minutes in the form hh:mm HOUR Time Writes Time values as hours and decimal fractions of hours IS8601DA Date Writes data values in the extended format YYYY-MM-DD IS8601DN DateTime Writes datetime values with only a date portion in the extended format YYYY-MM-DD IS8601DT DateTime Writes datetime values in the extended format YYYY-MM-DDThh:mm:ss[.fffff] IS8601DZ DateTime Writes datetime values with a time zone in the extended format YYYY-MM-DDThh:mm:ss[.fffff][Z]|[[+|-]hh:mm] IS8601LZ Time Writes time values with a time zone in the extended format hh:mm:ss[.fffff][Z][+|-]hh:mm] IS8601TM Time Writes time values in the extended format hh:mm:ss[.fffff] IS8601TZ Time Writes time values with a time zone in the extended format hh:mm:ss[.fffff][Z][+|-]hh:mm] JULDAY Date Writes Date values as the Julian day of the year JULIAN Date Writes Date values as Julian dates in the form yyddd or yyyyddd MINGUO Date Writes Date values as Taiwanese dates in the form yyymmdd MMDDYY Date Writes Date values in the form mmdd{yy} yy or mm/dd/{yy}yy , where a forward slash is the separator and the year appears as either 2 or 4 digits MMDDYYC Date Writes Date values in the form mmdd{yy} yy or mmXdd:{yy}yy , the year appears as either 2 or 4 digits MMDDYYD Date Writes Date values in the form mmdd{yy} yy or mmXdd-{yy}yy , the year appears as either 2 or 4 digits MMDDYYN Date Writes Date values in the form mmdd{yy} yy or mmXdd{yy}yy , the year appears as either 2 or 4 digits MMDDYYP Date Writes Date values in the form mmdd{yy} yy or mmXdd.{yy}yy , the year appears as either 2 or 4 digits MMDDYYS Date Writes Date values in the form mmdd{yy} yy or mmXdd/{yy}yy , the year appears as either 2 or 4 digits MMSS Time Writes Time values as the number of minutes and seconds since midnight MMYY Date Writes Date values in the form mmM{yy} yy, where M is the separator and the year appears as either 2 or 4 digits MMYYx Date Writes Date values in the form mm{yy} yy or mmX{yy}yy, where X represents a specified separator and the year appears as either 2 or 4 digits MONNAME Date Writes Date values as the name of the month MONTH Date Writes Date values as the month of the year MONYY Date Writes Date values as the month and the year in the form mmmyy or mmmyyyy NENGO Date Writes Date values as Japanese dates in the form e.yymmdd NLDATE Date Converts a SAS Date value to the date value of the specified locale and then writes the value in the format of date NLDATEMN Date Converts a SAS Date value to the date value of the specified locale and then writes the date value in the format of name of month NLDATEW Date Converts a SAS Date value to the date value of the specified locale, and then writes the date value in the format of the date and the day of week NLDATEWN Date Converts the SAS Date value to the date value of the specified locale and then writes the date value in the format of the name of day of week NLDATM DateTime Converts a SAS DateTime value to the datetime value of the specified locale and then writes the value in the format of datetime NLDATMAP DateTime Converts a SAS DateTime value to the datetime value of the specified locale and then writes the value in the format of datetime with a.m. or p.m. NLDATMTM Time Converts the Time portion of a SAS datetime value to the time-of-day value of the specified locale and then writes the value in the format of time of day NLDATMW Date Converts a SAS Date value to a datetime value of the specified locale and then writes the value in the format of day of week and datetime NLTIMAP Time Converts a SAS Time value to the time value of a specified locale and then writes the value in the format of a time value with a.m. or p.m. NLTIME Time Converts a SAS Time value to the time value of the specified locale and then writes the value in the format of time PDJULG Date Writes packed Julian Date values in the hexadecimal format yyyydddF for IBM PDJULI Date Writes packed Julian Date values in the hexadecimal format ccyydddF for IBM QTR Date Writes Date values as the quarter of the year QTRR Date Writes Date values as the quarter of the year in Roman numerals TIME Time Writes Time values as hours, minutes, and seconds in the form hh:mm:ss.ss TIMEAMPM Time Writes Time values as hours, minutes, and seconds in the form hh:mm:ss.ss with AM or PM TOD Time Writes the Time portion of datetime values in the form hh:mm:ss.ss WEEKDATE Date Writes Date values as the day of the week and the date in the form day-of-week, month-name dd, yy (or yyyy) WEEKDATX Date Writes Date values as the day of the week and date in the form day-of-week, dd month-name yy (or yyyy) WEEKDAY Date Writes Date values as the day of the week WEEKU Date Writes a week number in decimal format by using the U algorithm WEEKV Date Writes a week number in decimal format by using the V algorithm WEEKW Week Reads the format of the number-of-week value within the year and returns a SAS-date value using the W algorithm WORDDATE Date Writes Date values as the name of the month, the day, and the year in the form month-name dd, yyyy WORDDATX Date Writes Date values as the day, the name of the month, and the year in the form dd month-name yyyy YEAR Date Writes Date values as the year YYMM Date Writes Date values in the form {yy}yyM mm, where M is the separator and the year appears as either 2 or 4 digits YYMM Date Writes Date values in the form {yy}yymm or {yy}yyXmm, where X represents a specified separator and the year appears as either 2 or 4 digits YYMMDD Date Writes Date values in the form {yy}yymmdd or {yy}yy-mm- dd, where a dash is the separator and the year appears as either 2 or 4 digits YYMMDDx Date Writes Date values in the form {yy}yymmdd or {yy}yyXmmXdd, where X represents a specified separator and the year appears as either 2 or 4 digits YYMON Date Writes Date values in the form yymmm or yyyymmm YYQ Date Writes Date values in the form {yy}yyQ q, where Q is the separator, the year appears as either 2 or 4 digits, and q is the quarter of the year YYQx Date Writes Date values in the form {yy}yyq or {yy}yyXq, where X represents a specified separator, the year appears as either 2 or 4 digits, and q is the quarter of the year YYQR Date Writes Date values in the form {yy}yyQ qr, where Q is the separator, the year appears as either 2 or 4 digits, and qr is the quarter of the year expressed in roman numerals YYQRx Date Writes Date values in the form {yy}yy qr or {yy}yyXqr, where X represents a specified separator, the year appears as either 2 or 4 digits, and qr is the quarter of the year expressed in Roman numerals DOLLAR Currency-dollars Writes numeric values with a leading dollar sign, a comma that separates every three digits, and a period that separates the decimal fraction DOLLARX Currency-dollars Writes numeric values with a leading dollar sign, a period that separates every three digits, and a comma that separates the decimal fraction EURO Currency-euros Writes numeric values with a leading euro symbol (E), a comma that separates every three digits, and a period that separates the decimal fraction EUROX Currency-euros Writes numeric values with a leading euro symbol (E), a period that separates every three digits, and a comma that separates the decimal fraction NLMNY Currency Writes the monetary format of the local expression in the specified locale using local currency NLMNYI Currency Writes the monetary format of the international expression in the specified locale NLPCT Proportion Writes percentage data of the local expression in the specified locale NLPCTI Proportion Writes percentage data of the international expression in the specified locale PERCENT Proportion Writes numeric values as percentages PERCENTN Proportion Produces percentages, using a minus sign for negative values. SSN Social Security Number Writes Social Security numbers ; run; /* capture the current user formats in myformats */ proc format cntlout=myformats; run; /* clean and upcase a sas name to make sure no nasty characters present */ /* allow only a-z A-Z 0-9 and _ */ %Macro SASName(macroVar=mv); %do; call symput("¯oVar",trim(upcase(prxchange('s/\W//', -1,symget("¯oVar") ) ) ) ); %end; %Mend SASName; /* escape the five characters that cannot appear in PCDATA */ %Macro Xescape(var=xxxx); %do; prxchange('s/\"/"/',-1, prxchange('s/\''/'/',-1, prxchange('s/\>/>/',-1, prxchange('s/\"; put ""; run; %mend DDIInstanceStart; %Macro IdentificationElement(); %put Start Identification Element; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; %GetParam(macroVar=IdentificationElementID); put " " param +(-1) ""; put " "; run; %mend IdentificationElement; /* Citation element */ /* Duplicate ns1:DDIInstance/r:Citation subelements that match */ /* ns1:DDIInstance/r:Citation/dce:DCElements subelements into */ /* the DCElements (Dublin Core). Explicitly set the rest of the */ /* Dublin Core elements */ %Macro CitationElement(I_C_Title=DDI file from SAS dataset, I_C_SubTitle=, I_C_AlternateTitle=, I_C_Creator=, I_C_Publisher=, I_C_Contributor=, I_C_PublicationDate=, I_C_Language=, I_C_InternationalIdentifierType=, I_C_InternationalIdentifier=, I_C_Copyright=, DCE_subject=, DCE_description=, DCE_date=, DCE_type=, DCE_format=, DCE_identifier=, DCE_source=, DCE_relation=, DCE_coverage=, DCE_rights= ); %put Citation Element; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; /* title is required */ %GetParam(macroVar=I_C_Title) put " " param +(-1) " "; %GetParam(macroVar=I_C_SubTitle) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_AlternateTitle) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Creator) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Publisher) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Contributor) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_PublicationDate) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Language) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_InternationalIdentifierType) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Copyright) if param ne " " then put " " param +(-1) " "; /* Optional Dublin Core elements */ put " "; %GetParam(macroVar=I_C_Title) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Creator) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_subject) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_description) put " "; if param ne " " then put param +(-1) ; T_memlabel=trim(symget("T_memlabel")); put "SAS Dataset Label: " T_memlabel; put "SAS Dataset Creation Date: &T_crDate"; put "SAS Dataset Modified Date: &T_modate"; put "SAS Dataset nobs: &T_nobs"; put "SAS Dataset nvar: &T_nvar"; T_encoding=trim(symget("T_encoding")); put "SAS Dataset encoding: " T_encoding; put " "; %GetParam(macroVar=I_C_Publisher) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Contributor) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_date) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_type) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_format) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_identifier) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_source) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Language) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_relation) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_coverage) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_rights) if param ne " " then put " " param +(-1) " "; put " "; put " "; put " "; run; %mend CitationElement; %Macro StudyUnitStart(SU_ID=StudyUnit_001, SU_Version=1.0, SU_VersionDate=, SU_VersionResponsibility=Responsible Party, SU_Title=Study Title, SU_Creator=Study Creator, SU_Publisher=Study Publisher, SU_Contributer=Study Contributer, SU_Abstract=Abstract for the study appears here, SU_FundingAgencyID=ID of Funding Agency could go here, SU_FundingAgencyURN=URN of funding agency could go here, SU_GrantNumber=Grant number could go here, SU_Purpose=The purpose of the data goes here, SU_TopicalKeyword1=Keyword1, SU_TopicalKeyword2=Keyword2, SU_SpatialCoverageDescription=A description of the spatial coverage goes here, SU_GeographyName1=Name of first geography, SU_GeographyName2=Name of second geography, SU_StartDate=, SU_EndDate=, SU_AnalysisUnit=Unit of analysis described here, SU_GeographyNote=A note about the geography goes here, SU_Note=A general note about the Study Unit goes here); %put Start Study Unit; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; %GetParam(macroVar=SU_ID) put " " param +(-1) ""; %GetParam(macroVar=SU_Version) put " " param +(-1) ""; %GetParam(macroVar=SU_VersionDate) if param ne " " then put " " param +(-1) ""; %GetParam(macroVar=SU_VersionResponsibility) put " " param +(-1) ""; put " "; put " "; %GetParam(macroVar=SU_Title) put " " param +(-1) ""; %GetParam(macroVar=SU_Creator) put " " param +(-1) ""; %GetParam(macroVar=SU_Publisher) put " " param +(-1) ""; %GetParam(macroVar=SU_Contributer) put " " param +(-1) ""; put " "; put " "; put " Abstract_001"; %GetParam(macroVar=SU_Abstract) put " " param +(-1) ""; put " "; put " "; put " Universe_001"; put " "; put " "; put " "; %GetParam(macroVar=SU_FundingAgencyID) put " "; put " " param +(-1) ""; %GetParam(macroVar=SU_FundingAgencyURN) put " " param +(-1) ""; put " "; put " "; %GetParam(macroVar=SU_GrantNumber) put " " param +(-1) ""; put " "; put " "; put " Purpose_001"; %GetParam(macroVar=SU_Purpose) put " " param +(-1) ""; put " "; put " "; put " "; put " "; put " TopicalCoverage_001"; put " "; %GetParam(macroVar=SU_TopicalKeyword1) put " " param +(-1) ""; %GetParam(macroVar=SU_TopicalKeyword2) put " " param +(-1) ""; put " "; put " "; put " "; put " SpatialCoverage_001"; put " "; %GetParam(macroVar=SU_SpatialCoverageDescription) put " " param +(-1) ""; put " "; put " "; put " "; put " Geography_001"; put " "; put " "; %GetParam(macroVar=SU_GeographyName1) put " " param +(-1) ""; put " "; put " "; put " "; put " "; put " Geography_002"; put " "; put " "; %GetParam(macroVar=SU_GeographyName2) put " " param +(-1) ""; put " "; put " "; put " "; put " "; put " Geography_001"; put " "; put " "; put " Geography_002"; put " "; put " "; put " "; put " "; put " TemporalCoverage_001"; put " "; /* must have a non blank start date to have a Reference Date */ /* end date may be missing or not if start date is present */ /* an end date with a missing start date is invalid */ %GetParam(macroVar=SU_StartDate) if param ne " " then do; put " "; put " " param +(-1) ""; %GetParam(macroVar=SU_EndDate) put " " param +(-1) ""; put " "; end; /* SU_StartDate non blank */ put " "; put " "; %GetParam(macroVar=SU_AnalysisUnit) put " " param +(-1) ""; put " "; put " "; put " Geography_001"; put " "; put " "; put " Geography_002"; put " "; %GetParam(macroVar=SU_GeographyNote) put " " param +(-1) ""; put " "; put " "; put " "; put " StudyUnit_001"; put " "; %GetParam(macroVar=SU_Note) put " " param +(-1) "" ; put " "; put " "; run; %mend StudyUnitStart; %Macro ConceptualComponent(C_UniverseDescription=A description of the Study Universe goes here); /* ****** ConceptualComponent ******* */ %put Start Conceptual Component; data concept; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; set MyColumns2 end=last; by name; if _n_=1 then do; put " "; put " ConceptualComponent_001"; put " "; put " ConceptScheme_001"; end; /* create a concept for each user format */ /* the best wy can do for the concept is to use the variable label if present */ put " "; put " " xName +(-1) ""; if xLabel =" " then put " Concept for " xName +(-1) ""; else put " " xLabel +(-1) ""; put " "; if last then do; put " "; put " "; put " UniverseScheme_001"; put " "; put " Universe_001 "; %GetParam(macroVar=C_UniverseDescription) put " Universe_001: " param +(-1) ""; put " "; put " "; put " "; put " "; end; run; %mend ConceptualComponent; %Macro DataCollectionStart(D_ID=DataCollection_001, D_StudyNote=A note about the data collection at the Study Unit level goes here, D_GeographyNote=A note about the data collection geography goes here, D_TimeNote1=A note about the data collection time goes here, D_TimeNote2=Another note about the data collection time goes here, D_Sampling=A note about sampling goes here, D_DataSource=A note about the data source goes here, D_StartDate=, D_EndDate=, D_CollectionMode=A note about the mode of collection goes here ); %put Start Data Collection; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; %GetParam(macroVar=D_ID) put " " param +(-1) ""; put " "; put " "; put " "; put " StudyUnit_001"; put " "; %GetParam(macroVar=D_StudyNote) put " " param +(-1) ""; put " "; put " "; put " "; put " StudyUnit_001"; put " "; put " "; put " Geography_001"; put " "; put " "; put " Geography_002"; put " "; %GetParam(macroVar=D_GeographyNote) put " " param +(-1) ""; put " "; put " "; put " "; put " "; put " TimeMethod_001"; put " "; %GetParam(macroVar=D_TimeNote1) put " " param +(-1) ""; put " "; put " "; put " "; put " TimeMethod_002"; put " "; %GetParam(macroVar=D_TimeNote2) put " " param +(-1) ""; put " "; put " "; put " "; put " Sampling_001"; put " "; %GetParam(macroVar=D_Sampling) put " " param +(-1) ""; put " "; put " "; put " "; put " "; %GetParam(macroVar=D_DataSource) put " " param +(-1) ""; put " "; /* must have a non blank start date to have a DataCollectionFrequency */ /* end date may be missing or not if start date is present */ /* an end date with a missing start date is invalid */ %GetParam(macroVar=D_StartDate) if param ne " " then do; put " "; put " " param +(-1) ""; %GetParam(macroVar=D_EndDate) put " " param +(-1) ""; put " "; end; /* D_StartDate non blank */ put " "; put " "; put " CollectionMode_001"; put " "; %GetParam(macroVar=D_CollectionMode) put " " param +(-1) ""; put " "; put " "; run; %mend DataCollectionStart; %Macro QuestionSchemeStart(QuestionScheme_ID=QuestionScheme_001); %put Start Question Scheme; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; %GetParam(macroVar=QuestionScheme_ID) put " " param +(-1) ""; put " "; run; %mend QuestionSchemeStart; %Macro QuestionItem(QuestionItem_ID=Question, QuestionItem_Text=What is the question?, QuestionItem_CodeScheme=Answer42); %put QuestionItem; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; %GetParam(macroVar=QuestionItem_ID) put " " param +(-1) ""; put " "; put " "; put " "; %GetParam(macroVar=QuestionItem_Text) put " " param +(-1) ""; put " "; put " "; put " "; put " "; %GetParam(macroVar=QuestionItem_CodeScheme) put " " param +(-1) ""; put " "; put " "; put " "; run; %mend QuestionItem; %Macro QuestionSchemeEnd(); %put End Question Scheme; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; run; %mend QuestionSchemeEnd; %Macro DataCollectionEnd(); %put End Data Collection; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; put " "; put " Cleaning consisted in looking through the entered data and ensuring proper XML format"; put " "; put " "; put " "; put " "; put " Weighting_001"; put " "; put " "; put " No weighting is necessary"; put " "; put " "; put " "; put " 100%"; put " no sampling error"; put " "; put " "; put " "; put " "; run; %mend DataCollectionEnd; %Macro LogicalProductStart(L_ID=LogicalProduct_001); %put Start Logical Product; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; %GetParam(macroVar=L_ID) put " " param +(-1) ""; run; %mend LogicalProductStart; /* ****** category Scheme generated from the SAS dataset ******* */ %Macro CategoryScheme(); %put Catagory Scheme; data uniqueLabels2; set uniquelabels; by fmtname label; file ddi mod Linesize=&DDILinesize; if first.fmtname then do; put " "; put " Cats_" fmtname +(-1) ""; end; CatID=trim(fmtname)||"_"||left(put(_n_,8.)); put " "; put " " CatID +(-1) ""; put " " label +(-1) ""; put " "; if last.fmtname then do; put " "; put " "; end; run; %Mend CategoryScheme; /* ****** code Scheme generated from the SAS dataset ******* */ %Macro CodeScheme(); proc sql; create table userFormats2 as select userFormats.*, CatID from UniqueLabels2 as L,userFormats as F where L.fmtname=F.fmtname and L.label=F.label order by fmtname,start; quit; %put Code Scheme; data codes; length param $ &maxParamLength; set userFormats2; by fmtname start; file ddi mod Linesize=&DDILinesize; if first.fmtname then do; CodeSchemeID="Codes_"||trim(fmtname); put " "; put " " CodeSchemeID +(-1) ""; put " Cats_" fmtName +(-1) ""; end; if start=end then do; CodeID="C_"||trim(fmtname)||"_"||left(put(_n_,8.)); put " "; put " "; put " " CatID +(-1) ""; put " "; put " " start +(-1) ""; put " "; end; if last.fmtname then do; put " "; put " "; end; run; %Mend CodeScheme; /* ****** variable Scheme generated from the SAS dataset ******* */ %Macro VariableScheme(); %put Variable Scheme; data vars; length param $ &maxParamLength; set myColumns2 end=last; by name; file ddi mod Linesize=&DDILinesize; if _n_=1 then do; put " "; put " VariableScheme_001"; end; put " "; put " " xName +(-1) "" xName +(-1) ""; put " " xLabel +(-1) ""; /* SAS specific information goes here */ put " " @; put "SAS varnum: " varnum @; if format ne " " then put " SAS format: " format @; if informat ne " " then put " SAS informat " informat @; if idxusage ne " " then put " SAS idxusage " idxusage @; if sortedby ne 0 then put " SAS sortedby " sortedby @; if notnull ne "no" then put " SAS notnull " notnull @; if precision ne . then put " SAS precision " precision @; if scale ne . then put " SAS scale " scale @; put " SAS transcode: " transcode @; if ConstraintNote ne " " then put ConstraintNote @; put ""; put " " xName +(-1) ""; /* THIS SECTION NEEDS to be expanded to capture any other formats */ /* that indicate units of measurement */ put " "; if formatDescription ne " " then put " SAS format indicates: " formatDescription +(-1) ""; if type='num' then select (represents); when ("Date") put " "; when ("DateTime") put " "; when ("Time") put " "; otherwise put " "; end;/* type="num" */ else put " "; /* code scheme reference only for user formats */ if uFmtName ne " " then do; put " "; put " "; put " "; put " Codes_" xName +(-1) ""; put " "; put " "; put " "; end; put " "; put " "; put " "; if last then do; put " "; put " "; end; run; %Mend VariableScheme; %Macro LogicalProductEnd(); %put End Logical Product; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; run; %mend LogicalProductEnd; %Macro PhysicalDataProductStart(PD_ID=PhysicalDataProduct_001, PD_LogicalProductReference=LogicalProduct_001, PD_GrossRecordStructureID=GrossRecordStructure_001); %put Start Physical Product; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; %GetParam(macroVar=PD_ID) put " " param +(-1) ""; %GetParam(macroVar=PD_LogicalProductReference) put " " param +(-1) ""; put " "; put " "; %GetParam(macroVar=PD_GrossRecordStructureID) put " " param +(-1) ""; put " "; put " ??????"; put " "; put " "; put " EmbeddedData"; put " Data are embedded in this DDI file"; put " "; run; %mend PhysicalDataProductStart; /* ****** DataItems generated from the SAS dataset ******* */ %Macro DataItems(); proc sort data=MyColumns; by varnum; run; %put Data Items; data _null_; length param $ &maxParamLength; set myColumns end=last; by varnum; file ddi mod Linesize=&DDILinesize; put " "; put " " xName +(-1) ""; put " "; if last then put " "; run; %Mend DataItems; /* ****** dataset generated from the SAS Dataset ******* */ %Macro Dataset(); %put Dataset; data ds; set &lib..&dataset end=_last; file ddi mod Linesize=&DDILinesize; array nums{*} _numeric_; array chars{*} _character_; length _MyVarname $32; length _myValue $2000; if _n_=1 then do; put " "; put " Dataset_001"; put " Dataset_001"; put " VariableScheme_001"; end; /* for each row of data */ put " "; /* this approach requires outputting */ /* all of the numeric variables together */ /* and all of the character variables together */ /* Original column order is preserved in the VariableScheme */ /* Proc Transpose approach? */ do i=1 to dim(nums); /* get name of numeric variable */ call vname(nums{i},_MyVarname); put " "; put " " _MyVarname +(-1) ""; /* */ _myFMT=prxchange('s/\d*\.\d*//',1,vformat(nums{i})) ; /* numeric values - empty element if missing */ if nums{i} <= .Z then put " "; /* format date/time vars appropriately */ else do; select (_myFMT); when ("IS8601DA","DATE") put " " nums{i} IS8601DA. ""; when ("IS8601DT","DATETIME") put " " nums{i} IS8601DT. ""; when ("IS8601TM","TIME") put " " nums{i} IS8601TM. ""; otherwise do; /* left justify */ _MyValue=left(put(nums{i}, BEST18.)); put " " _MyValue ""; end; end; /* select */ end; /* if nums{i} <= .Z */ put " "; end; do i=1 to dim(chars); /* get name of character variable */ call vname(chars{i},_MyVarname); _MyValue=%Xescape(var=chars{i}); put " "; put " " _MyVarname +(-1) ""; put " " _MyValue ""; put " "; end; put " "; if _last then do; put " "; end; run; %Mend DataSet; %Macro PhysicalDataProductEnd(); %put End Physical Product; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; run; %mend PhysicalDataProductEnd; %Macro StudyUnitEnd(); %put End Study Unit; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; run; %mend StudyUnitEnd; %Macro DDIInstanceEnd(); %put End DDI Instance; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; put " "; %GetParam(macroVar=IdentificationElementID) put " " param +(-1) ""; put " "; put " The original version of this DDI file was generated from a SAS dataset" ; put " "; put " "; put ""; run; %mend DDIInstanceEnd; /* ********************************************* */ /* write out a One Itemval */ /* ********************************************* */ %macro ItemVal(var=MIN, type=N); /* let SAS do the type conversion */ put ' '; put " &var."; /* escape any characters invalid for XML */ %if &type=N %then %do; put ' ' &var. ''; %end; %else %do; strvalue=&var.; strvalue = %Xescape(var=strvalue); put ' ' strvalue $ ''; %end; put ' '; %mend ItemVal; /* ********************************************* */ /* write out a Studyunit for the cntlout dataset */ /* ********************************************* */ %Macro CntloutXML(dataset=myformats); %put StudyUnit for CNTLOUT Dataset; data _null_; file ddi mod Linesize=&DDILinesize; set &dataset. end=last; if _n_=1 then do; put ' '; put ' StudyUnit_SAS_cntlout'; put ' 1.0'; put ' IPSR - The University of Kansas'; put ' '; put ' '; put ' Metadata for the standard SAS cntlin cntlout dataset'; put ' Larry Hoyle'; put ' Institute for Policy & Social Research; University of Kansas'; put ' '; put ' '; put ' '; put ' AbstractCntlout_001'; put ' SAS PROC format can output a dataset containg all of the information needed to recreate the active formats and informats. This file can be used to recreate the environment in which a SAS dataset existed.'; put ' '; put ' '; put ' UniverseCntlout_001'; put ' '; put ' '; put ' '; put ' '; put ' none'; put ' none'; put ' '; put ' '; put ' none'; put ' '; put ' '; put ' PurposeCntlout_001'; put ' These data were generated by a SAS program to test SAS programs exporting data to DDI 3. They were then hand edited to add additional codes, etc..'; put ' '; put ' '; put ' '; put ' '; put ' TopicalCoverageCntlout_001'; put ' '; put ' SAS'; put ' DDI'; put ' CNTLIN'; put ' CNTLOUT'; put ' '; put ' '; put ' '; put ' SpatialCoverageCntlout_001'; put ' '; put ' The data were generated in Lawrence Kansas but do not apply to this location'; put ' '; put ' '; put ' '; put ' GeographyCntlout_001'; put ' '; put ' '; put ' Kansas'; put ' '; put ' '; put ' '; put ' '; put ' Geography_002'; put ' '; put ' '; put ' Lawrence'; put ' '; put ' '; put ' '; put ' '; put ' GeographyCntlout_001'; put ' '; put ' '; put ' Geography_002'; put ' '; put ' '; put ' '; put ' '; put ' TemporalCoverageCntlout_001'; put ' '; put ' '; put ' '; put ' SAS Format or Informat '; put ' '; put ' ConceptualComponentCntlout_001'; put ' '; put ' ConceptSchemeCntlout_001'; put ' '; put ' DATATYPE'; put ' Date/time/datetime?'; put ' '; put ' '; put ' DECSEP'; put ' Decimal separator'; put ' '; put ' '; put ' DEFAULT'; put ' Default length, a numeric variable that indicates the default length for format or informat'; put ' '; put ' '; put ' DIG3SEP'; put ' Three-digit separator'; put ' '; put ' '; put ' EEXCL'; put ' End exclusion, a character variable that indicates whether the range''s ending value is excluded.'; put ' '; put ' '; put ' END'; put ' Ending value for format, a character variable that gives the range''s ending value'; put ' '; put ' '; put ' FILL'; put ' Fill character, for picture formats, a numeric variable whose value is the value of the FILL= option'; put ' '; put ' '; put ' FMTNAME'; put ' Format name, a character variable whose value is the format or informat name'; put ' '; put ' '; put ' FUZZ'; put ' Fuzz value, a numeric variable whose value is the value of the FUZZ= option'; put ' '; put ' '; put ' HLO'; put ' Additional information, a character variable that contains range information about the format or informat in the form of eight different letters that can appear in any combination.'; put ' '; put ' '; put ' LABEL'; put ' Format value label, a character variable whose value is the informatted or formatted value or the name of an existing informat or format'; put ' '; put ' '; put ' LANGUAGE'; put ' Language for date strings'; put ' '; put ' '; put ' LENGTH'; put ' Format length, a numeric variable whose value is the value of the LENGTH= option'; put ' '; put ' '; put ' MAX'; put ' Maximum length, a numeric variable whose value is the value of the MAX= option'; put ' '; put ' '; put ' MIN'; put ' Minimum length, a numeric variable whose value is the value of the MIN= option'; put ' '; put ' '; put ' MULT'; put ' Multiplier, a numeric variable whose value is the value of the MULT= option'; put ' '; put ' '; put ' NOEDIT'; put ' Is picture string noedit? for picture formats, a numeric variable whose value indicates whether the NOEDIT option is in effect.'; put ' '; put ' '; put ' PREFIX'; put ' Prefix characters, for picture formats, a character variable whose value is the value of the PREFIX= option'; put ' '; put ' '; put ' SEXCL'; put ' Start exclusion, a character variable that indicates whether the range''s starting value is excluded. '; put ' '; put ' '; put ' START'; put ' Starting value for format, a character variable that gives the range''s starting value'; put ' '; put ' '; put ' TYPE'; put ' Type of format, a character variable that indicates the type of format.'; put ' '; put ' '; put ' '; put ' UniverseSchemeCntlout_001'; put ' '; put ' UniverseCntlout_001 '; put ' UniverseCntlout_001: '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' Cntlout_001'; put ' '; put ' '; put ' '; put ' StudyUnit_SAS_cntlout'; put ' '; put ' CNTLOUT files are documented under Output Control Data Set in the Format Procedure documentation'; put ' '; put ' '; put ' '; put ' StudyUnit_SAS_cntlout'; put ' '; put ' '; put ' GeographyCntlout_001'; put ' '; put ' '; put ' Geography_002'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' 2008-03-05'; put ' 2008-03-05'; put ' '; put ' '; put ' '; put ' CollectionModeCntlout_001'; put ' '; put ' A PROC FORMAT generated an initial cntlout data table with a cariety of format and informat types. Information from the SAS documentation was then inserted by hand.'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' oXyent 9.1 was used to edit and ensure validity of the XML.'; put ' '; put ' '; put ' '; put ' '; put ' WeightingCntlout_001'; put ' '; put ' '; put ' No weighting is necessary'; put ' '; put ' '; put ' '; put ' 100%'; put ' no sampling error'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' LogicalProductCntlout_001'; put ' '; put ' Cats_EEXCL'; put ' '; put ' EEXCL_1'; put ' The range''s ending value is excluded'; put ' '; put ' '; put ' EEXCL_2'; put ' the range''s ending value is not excluded'; put ' '; put ' '; put ' '; put ' '; put ' Cats_HLO'; put ' '; put ' HLO_3'; put ' MULTILABEL option is in effect'; put ' '; put ' '; put ' HLO_4'; put ' NOTSORTED option is in effect'; put ' '; put ' '; put ' HLO_5'; put ' ROUND option is in effect'; put ' '; put ' '; put ' HLO_6'; put ' format or informat has no ranges, including no OTHER= range'; put ' '; put ' '; put ' HLO_7'; put ' numeric informat range (informat defined with unquoted numeric range)'; put ' '; put ' '; put ' HLO_8'; put ' range is OTHER'; put ' '; put ' '; put ' HLO_9'; put ' range''s ending value is HIGH'; put ' '; put ' '; put ' HLO_10'; put ' range''s starting value is LOW'; put ' '; put ' '; put ' HLO_11'; put ' standard SAS format or informat used for formatted value or informatted value'; put ' '; put ' '; put ' '; put ' '; put ' Cats_NOEDIT'; put ' '; put ' NOEDIT_12'; put ' NOEDIT option is in effect'; put ' '; put ' '; put ' NOEDIT_13'; put ' NOEDIT option is not in effect'; put ' '; put ' '; put ' '; put ' '; put ' Cats_SEXCL'; put ' '; put ' SEXCL_14'; put ' the range''s starting value is excluded'; put ' '; put ' '; put ' SEXCL_15'; put ' the range''s starting value is not excluded'; put ' '; put ' '; put ' '; put ' '; put ' Cats_TYPE'; put ' '; put ' TYPE_16'; put ' character format'; put ' '; put ' '; put ' TYPE_17'; put ' character informat'; put ' '; put ' '; put ' TYPE_18'; put ' numeric format (excluding pictures)'; put ' '; put ' '; put ' TYPE_19'; put ' numeric informat'; put ' '; put ' '; put ' TYPE_20'; put ' picture format'; put ' '; put ' '; put ' '; put ' '; put ' Codes_EEXCL'; put ' Cats_EEXCL'; put ' '; put ' '; put ' EEXCL_2'; put ' '; put ' N'; put ' '; put ' '; put ' '; put ' EEXCL_1'; put ' '; put ' Y'; put ' '; put ' '; put ' '; put ' '; put ' Codes_HLO'; put ' Cats_HLO'; put ' '; put ' '; put ' HLO_11'; put ' '; put ' F'; put ' '; put ' '; put ' '; put ' HLO_9'; put ' '; put ' H'; put ' '; put ' '; put ' '; put ' HLO_7'; put ' '; put ' I'; put ' '; put ' '; put ' '; put ' HLO_10'; put ' '; put ' L'; put ' '; put ' '; put ' '; put ' HLO_3'; put ' '; put ' M'; put ' '; put ' '; put ' '; put ' HLO_6'; put ' '; put ' N'; put ' '; put ' '; put ' '; put ' HLO_8'; put ' '; put ' O'; put ' '; put ' '; put ' '; put ' HLO_5'; put ' '; put ' R'; put ' '; put ' '; put ' '; put ' HLO_4'; put ' '; put ' S'; put ' '; put ' '; put ' '; put ' '; put ' Codes_NOEDIT'; put ' Cats_NOEDIT'; put ' '; put ' '; put ' NOEDIT_13'; put ' '; put ' 0'; put ' '; put ' '; put ' '; put ' NOEDIT_12'; put ' '; put ' 1'; put ' '; put ' '; put ' '; put ' '; put ' Codes_SEXCL'; put ' Cats_SEXCL'; put ' '; put ' '; put ' SEXCL_15'; put ' '; put ' N'; put ' '; put ' '; put ' '; put ' SEXCL_14'; put ' '; put ' Y'; put ' '; put ' '; put ' '; put ' '; put ' Codes_TYPE'; put ' Cats_TYPE'; put ' '; put ' '; put ' TYPE_16'; put ' '; put ' C'; put ' '; put ' '; put ' '; put ' TYPE_19'; put ' '; put ' I'; put ' '; put ' '; put ' '; put ' TYPE_17'; put ' '; put ' J'; put ' '; put ' '; put ' '; put ' TYPE_18'; put ' '; put ' N'; put ' '; put ' '; put ' '; put ' TYPE_20'; put ' '; put ' P'; put ' '; put ' '; put ' '; put ' VariableSchemeCntlout_001'; put ' '; put ' DATATYPEDATATYPE'; put ' Date/time/datetime?'; put ' SAS varnum: 20 SAS transcode: yes '; put ' DATATYPE'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' DECSEPDECSEP'; put ' Decimal separator'; put ' SAS varnum: 18 SAS transcode: yes '; put ' DECSEP'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' DEFAULTDEFAULT'; put ' Default length'; put ' SAS varnum: 7 SAS transcode: yes '; put ' DEFAULT'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' DIG3SEPDIG3SEP'; put ' Three-digit separator'; put ' SAS varnum: 19 SAS transcode: yes '; put ' DIG3SEP'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' EEXCLEEXCL'; put ' End exclusion'; put ' SAS varnum: 16 SAS transcode: yes '; put ' EEXCL'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' ENDEND'; put ' Ending value for format'; put ' SAS varnum: 3 SAS transcode: yes '; put ' END'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' FILLFILL'; put ' Fill character'; put ' SAS varnum: 12 SAS transcode: yes '; put ' FILL'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' FMTNAMEFMTNAME'; put ' Format name'; put ' SAS varnum: 1 SAS transcode: yes '; put ' FMTNAME'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' FUZZFUZZ'; put ' Fuzz value'; put ' SAS varnum: 9 SAS transcode: yes '; put ' FUZZ'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' HLOHLO'; put ' Additional information'; put ' SAS varnum: 17 SAS transcode: yes '; put ' HLO'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' LABELLABEL'; put ' Format value label'; put ' SAS varnum: 4 SAS transcode: yes '; put ' LABEL'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' LANGUAGELANGUAGE'; put ' Language for date strings'; put ' SAS varnum: 21 SAS transcode: yes '; put ' LANGUAGE'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' LENGTHLENGTH'; put ' Format length'; put ' SAS varnum: 8 SAS transcode: yes '; put ' LENGTH'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' MAXMAX'; put ' Maximum length'; put ' SAS varnum: 6 SAS transcode: yes '; put ' MAX'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' MINMIN'; put ' Minimum length'; put ' SAS varnum: 5 SAS transcode: yes '; put ' MIN'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' MULTMULT'; put ' Multiplier'; put ' SAS varnum: 11 SAS transcode: yes '; put ' MULT'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' NOEDITNOEDIT'; put ' Is picture string noedit?'; put ' SAS varnum: 13 SAS transcode: yes '; put ' NOEDIT'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' PREFIXPREFIX'; put ' Prefix characters'; put ' SAS varnum: 10 SAS transcode: yes '; put ' PREFIX'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' SEXCLSEXCL'; put ' Start exclusion'; put ' SAS varnum: 15 SAS transcode: yes '; put ' SEXCL'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' STARTSTART'; put ' Starting value for format'; put ' SAS varnum: 2 SAS transcode: yes '; put ' START'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' TYPETYPE'; put ' Type of format'; put ' SAS varnum: 14 SAS transcode: yes '; put ' TYPE'; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' '; put ' PhysicalDataProductCntlout_001'; put ' LogicalProductCntlout_001'; put ' '; put ' '; put ' GrossRecordStructureCntlout_001'; put ' '; put ' ??????'; put ' '; put ' '; put ' EmbeddedData'; put ' Data are embedded in this DDI file'; put ' '; put ' '; put ' FMTNAME'; put ' '; put ' '; put ' START'; put ' '; put ' '; put ' END'; put ' '; put ' '; put ' LABEL'; put ' '; put ' '; put ' MIN'; put ' '; put ' '; put ' MAX'; put ' '; put ' '; put ' DEFAULT'; put ' '; put ' '; put ' LENGTH'; put ' '; put ' '; put ' FUZZ'; put ' '; put ' '; put ' PREFIX'; put ' '; put ' '; put ' MULT'; put ' '; put ' '; put ' FILL'; put ' '; put ' '; put ' NOEDIT'; put ' '; put ' '; put ' TYPE'; put ' '; put ' '; put ' SEXCL'; put ' '; put ' '; put ' EEXCL'; put ' '; put ' '; put ' HLO'; put ' '; put ' '; put ' DECSEP'; put ' '; put ' '; put ' DIG3SEP'; put ' '; put ' '; put ' DATATYPE'; put ' '; put ' '; put ' LANGUAGE'; put ' '; put ' '; put ' '; put ' DatasetCntlout_001'; put ' DatasetCntlout_001'; put ' VariableSchemeCntlout_001'; put ''; end; %ItemVal(var=MIN, type=N); %ItemVal(var=MAX, type=N); %ItemVal(var=DEFAULT, type=N); %ItemVal(var=LENGTH, type=N); %ItemVal(var=FUZZ, type=N); %ItemVal(var=MULT, type=N); %ItemVal(var=NOEDIT, type=N); %ItemVal(var=FMTNAME, type=C); %ItemVal(var=START, type=C); %ItemVal(var=END, type=C); %ItemVal(var=LABEL, type=C); %ItemVal(var=PREFIX, type=C); %ItemVal(var=FILL, type=C); %ItemVal(var=TYPE, type=C); %ItemVal(var=SEXCL, type=C); %ItemVal(var=EEXCL, type=C); %ItemVal(var=HLO, type=C); %ItemVal(var=DECSEP, type=C); %ItemVal(var=DIG3SEP, type=C); %ItemVal(var=DATATYPE, type=C); %ItemVal(var=LANGUAGE, type=C); if last then do; put ''; put ' '; put ' '; put ' '; put ' '; put ' '; end; run; %mend CntloutXML; /* ******************************************************************************* */ /* ********* SAS2DDI Set options for the DDI File Here ************** */ /* ******************************************************************************* */ %Macro SAS2DDI(lib=work, dataset=mySASdata, XMLfile=ddi); /* lib- the library containing the dataset to output */ /* dataset- the dataset to output */ /* XMLfile- a FILEREF to the DDI XML file to be written */ /* ^^^^^^^^^^^^^ TO DO: clean the arguments ^^^^^^^^^^^^^^^^^ */ /* ^^^^^^^^^^^^^ in case this is later used in a Web app ^^^^^^^^^^^^^^^^^ */ data t; length mv $ 200; mv=symget('lib'); put 'cleaning ' mv; %SASName(macroVar=lib); mv=symget('dataset'); put 'cleaning ' mv; %SASName(macroVar=dataset); run; %put lib = &lib; %put dataset = &dataset; /* extract the table metadata put in macro variables */ proc sql ; create table TableMetadata as select *, %Xescape(var=memlabel) as xmemlabel from dictionary.tables where libname="&lib" and memname="&dataset"; ; select trim(xmemlabel), trim(put(crdate,datetime19.)), trim(put(modate,datetime19.)), trim(put(nobs,10.0)), trim(put(nvar,10.0)), trim(encoding) into :T_memlabel, :T_crDate, :T_modate, :T_nobs, :T_nvar, :T_encoding from TableMetadata ; quit; %put T_memlabel, &T_memlabel; %put T_crDate, &T_crDate; %put T_modate, &T_modate; %put T_nobs, &T_nobs; %put T_nvar, &T_nvar; %put T_encoding, &T_encoding; proc sql; /* extract the column metadata and escape characters where necessary */ create table myColumnsA as select *, prxchange('s/\d*\.\d*//',1,format) as fmtName, %Xescape(var=name) as xname, %Xescape(var=label) as xlabel from dictionary.columns where libname="&lib" and memname="&dataset" order by name; create table myColumns as select MyColumnsA.*, represents, %Xescape(var=formatDescription) as formatDescription from MyColumnsA left join formatDocu on MycolumnsA.fmtname=formatDocu.fmtname order by name; /* extract the format metadata for our columns */ proc sql; create table userFormats as select myFormats.* from myformats,myColumns where prxchange('s/^\$//',1,myColumns.fmtName)=myFormats.fmtname and myFormats.type in ("N","C"); /* make a unique list of the labels from the formats useful for concepts */ create table uniqueLabels as select distinct fmtname, label from userformats order by fmtname, label; create table MyColumns2a as select MyColumns.*, uFmtName from MyColumns left join (select distinct fmtname as uFmtName label="User Format Name" from userformats) as uf on MyColumns.fmtname=uf.ufmtname order by name; quit; /* ***************************************************** */ /* Capture metadata from integrity constraints */ /* unlike dictionary tables, proc contents displays the */ /* referenced table for a foreign key */ /* capture that reference along with other constraints */ /* ***************************************************** */ /* create empty table first and then insert into that table in case there are no constraints */ proc sql; create table WORK.IC1( label='Integrity Constraints' bufsize=16384 ) ( Member char(256), Num num format=1. label='#', 'Constraint'n char(14) label='Integrity Constraint', Type char(11), Variables char(7), WhereClause char(16) label='Where Clause', Reference char(15), OnDelete char(8) label='On Delete', OnUpdate char(8) label='On Update' ); quit; ods output Contents.DataSet.IntegrityConstraints=work.IC0; proc contents data=&lib..&dataset; run; ods output close; /* count the number of foreign key references */ /* if there are none there will be no refreence, OnDelete and OnUpdate columns */ proc sql noprint; select count(*) as NumForeigns into :NumForeigns from dictionary.columns where libname="WORK" and memname="IC0" and name="Reference"; quit; %put number of foreign key references is &NumForeigns; %IF &NumForeigns EQ 0 %THEN %DO; proc sql; insert into work.ic1( Member , Num , Constraint , Type , Variables , WhereClause ) select IC0.Member , IC0.Num , IC0.Constraint , IC0.Type , IC0.Variables , IC0.WhereClause from work.IC0; quit; %END; %ELSE %Do; proc sql; insert into work.ic1( Member , Num , Constraint , Type , Variables , WhereClause , Reference , OnDelete , OnUpdate ) select IC0.Member , IC0.Num , IC0.Constraint , IC0.Type , IC0.Variables , IC0.WhereClause , IC0.Reference , IC0.OnDelete , IC0.OnUpdate from work.IC0; quit; %END; /* for testing ^^^^^^^^^^^^^^^^^^^^^^^^^^ proc sql; create table d as select dict.* from DICTIONARY.CONSTRAINT_COLUMN_USAGE as dict where dict.table_catalog=upcase(symget("lib")) and dict.table_name=upcase(symget("dataset")) order by Column_name; quit; */ /* create empty table first and then insert into that table in case there are no constraints */ proc sql; create table work.dict ( table_catalog char(8) label='Libname', table_schema char(8) label='Table Schema', table_name char(32) label='Table', column_name char(32) label='Column', constraint_catalog char(32) label='Constraint Catalog', constraint_schema char(8) label='Constraint Schema', constraint_name char(32) label='Constraint Name' ); insert into work.dict ( table_catalog , table_schema , table_name , column_name , constraint_catalog , constraint_schema , constraint_name ) select table_catalog , table_schema , table_name , column_name , constraint_catalog , constraint_schema , constraint_name from DICTIONARY.CONSTRAINT_COLUMN_USAGE as dictionary where dictionary.table_catalog="&lib" and dictionary.table_name="&dataset"; proc sql; create table ColumnConstraints as select dict.Column_name, ic1.constraint, ic1.Type, ic1.Variables, ic1.WhereClause, ic1.Reference, ic1.OnDelete, ic1.OnUpdate from ic1, dict where ic1.constraint=dict.constraint_name order by Column_name, type; quit; /* concatenate all of the individual constraints into a single note for each variable */ data constraintNotes; set ColumnConstraints; by Column_name; length ConstraintNote $ 200; retain ConstraintNote; keep Column_Name ConstraintNote; if first.Column_name then do; ConstraintNote="SAS Integrity Constraints: "; end; select (upcase(type)); when ("NOT NULL")ConstraintNote=trim(ConstraintNote)||" Not Null (Variables " || trim(Variables) || ") " ; when ("UNIQUE")ConstraintNote=trim(ConstraintNote)||" Unique (Variables " || trim(Variables) || ") " ; when ("CHECK")ConstraintNote=trim(ConstraintNote)||" Check(Where " || trim(WhereClause) || ") " ; when ("FOREIGN KEY")ConstraintNote=trim(ConstraintNote)||" Foreign Key(Variables [" || trim(Variables) || "] reference table " || trim(Reference) || " On Delete " || trim(OnDelete) || " On Update " || trim(OnUpdate) || ") " ; when ("PRIMARY KEY")ConstraintNote=trim(ConstraintNote)||" Primary Key(Variables " || trim(variables) || ") " ; otherwise ConstraintNote=trim(ConstraintNote)||" Type( " || trim(type) || ") " ; end; if last.Column_name then do; output; end; run; /* join the note onto the column description */ proc sql; create table MyColumns2 as select Mycolumns2a.*, ConstraintNotes.ConstraintNote from MyColumns2a left join ConstraintNotes on upcase(ConstraintNotes.column_name)=upcase(Mycolumns2a.name) order by name; quit; /* ***************************************************** */ %DDIInstanceStart(schemaLoc=%NRSTR(file://C:\DDI\DDI30\XMLSchema\instance.xsd)) %IdentificationElement() %CitationElement(I_C_Title=%NRSTR(DDI file from SAS dataset), I_C_SubTitle=%NRSTR( ), I_C_AlternateTitle=%NRSTR( ), I_C_Creator=%NRSTR( ), I_C_Publisher=%NRSTR( ), I_C_Contributor=%NRSTR( ), I_C_PublicationDate=%NRSTR( ), I_C_Language=%NRSTR( ), I_C_InternationalIdentifierType=%NRSTR( ), I_C_InternationalIdentifier=%NRSTR( ), I_C_Copyright=%NRSTR( ), DCE_subject=%NRSTR( ), DCE_description=%NRSTR( ), DCE_date=%NRSTR( ), DCE_type=%NRSTR( ), DCE_format=%NRSTR( ), DCE_identifier=%NRSTR( ), DCE_source=%NRSTR( ), DCE_relation=%NRSTR( ), DCE_coverage=%NRSTR( ), DCE_rights=%NRSTR( ) ); %StudyUnitStart(SU_ID= %NRSTR(StudyUnit_001), SU_Version= %NRSTR(1.0), SU_VersionDate= %NRSTR( ), SU_VersionResponsibility= %NRSTR(IPSR - The University of Kansas), SU_Title= %NRSTR(Sample SAS Dataset for SAS to DDI3 Program), SU_Creator= %NRSTR(Larry Hoyle), SU_Publisher= %NRSTR(Institute for Policy & Social Research; University of Kansas), SU_Contributer= %NRSTR(Other study contributers would be listed here), SU_Abstract= %NRSTR(A sample dataset for moving data from SAS to DDI3 and back), SU_FundingAgencyID= %NRSTR(none), SU_FundingAgencyURN= %NRSTR(none), SU_GrantNumber= %NRSTR(none), SU_Purpose= %NRSTR(These data were generated by a SAS program to test SAS programs exporting data to DDI 3), SU_TopicalKeyword1= %NRSTR(SAS), SU_TopicalKeyword2= %NRSTR(DDI), SU_SpatialCoverageDescription= %NRSTR(The data were generated in Lawrence Kansas but do not apply to this location), SU_GeographyName1= %NRSTR(Kansas), SU_GeographyName2= %NRSTR(Lawrence), SU_StartDate= %NRSTR( ), SU_EndDate= %NRSTR( ), SU_AnalysisUnit= %NRSTR(imaginary person), SU_GeographyNote= %NRSTR(Kansas is a state in the United States. Lawrence is the city in which the University of Kansas is located.), SU_Note= %NRSTR(Columns were chosen to give a variety of data types and formats. The column avocados is essential to an esoteric pun. Not all elements that can occur multiple times do so in this sample. Title, & creator are two examples. A version that allows for those repeating elements will be important for testing a SAS xmlmap to read the data back in to SAS ) ) %ConceptualComponent(C_UniverseDescription= %NRSTR(All imaginary data about people) ) %DataCollectionStart(D_ID= %NRSTR(DataCollection_001), D_StudyNote= %NRSTR(The data are just made up - entered into a datalines statement in a SAS data step), D_GeographyNote= %NRSTR(Data were made up in Blake Hall in Lawrence Kansas), D_TimeNote1= %NRSTR(The dataset was created first), D_TimeNote2= %NRSTR(Metadata were added later), D_Sampling= %NRSTR(No sampling was involved), D_DataSource= %NRSTR(The imagination of Larry), D_StartDate= %NRSTR(2007-08-07 ), D_EndDate= %NRSTR(2007-08-15 ), D_CollectionMode= %NRSTR(A SAS program generated the data table. The metadata structure was copied from the file V3_MarkupTemplate.xml modified as needed in oXygen. The XML was then modified into SAS code.) ); %QuestionSchemeStart(QuestionScheme_ID= %NRSTR(QuestionScheme_001) ) %QuestionItem(QuestionItem_ID= %NRSTR(ID), QuestionItem_Text= %NRSTR(What is the ID for this observation?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(name), QuestionItem_Text= %NRSTR(What should the name of the respondent be?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(avocado), QuestionItem_Text= %NRSTR(How many avocados should the respondent have?), QuestionItem_CodeScheme= %NRSTR(Codes_AVOCADONUMBER) ); %QuestionItem(QuestionItem_ID= %NRSTR(sex), QuestionItem_Text= %NRSTR(What is the sex of the respondent?), QuestionItem_CodeScheme= %NRSTR(Codes_SEX) ) ; %QuestionItem(QuestionItem_ID= %NRSTR(percenttime), QuestionItem_Text= %NRSTR(What percent value should we try?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(fee), QuestionItem_Text= %NRSTR(Enter a value in Euros that will test capuring measurement units?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(DOB), QuestionItem_Text= %NRSTR(What date variable should we use - how about entering a Date of Birth?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(TOB), QuestionItem_Text= %NRSTR(What time variable should we use - (how about entering the person%'s Time of Birth?)), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(entryDT), QuestionItem_Text= %NRSTR(What is the DateTime when this value is being generated?), QuestionItem_CodeScheme= ); %QuestionSchemeEnd() %DataCollectionEnd() %LogicalProductStart(L_ID= %NRSTR(LogicalProduct_001) ) /* Category, Code and Variable schemes are all generated from the SAS Dataset - no parameters */ %CategoryScheme() %CodeScheme() %VariableScheme() %LogicalProductEnd() /* NOTE: LogicalProductReference must match ID from LogicalProductStart */ %PhysicalDataProductStart(PD_ID= %NRSTR(PhysicalDataProduct_001), PD_LogicalProductReference= %NRSTR(LogicalProduct_001), PD_GrossRecordStructureID= %NRSTR(GrossRecordStructure_001) ) /* ****** DataItems generated from the SAS dataset ******* */ %DataItems() /* ****** Dataset generated from the SAS dataset ******* */ %Dataset() %PhysicalDataProductEnd() %StudyUnitEnd() %CntloutXML(dataset=myformats); %DDIInstanceEnd() %Mend SAS2DDI; %SAS2DDI(lib=work, dataset=mySASdata, XMLfile=ddi);