/* SAS2DDI3_CR.sas - Output a SAS dataset as a DDI3 Candidate Release XML file */ /* First Draft Version August 17, 2007*/ /* August 22, 2007 14:40 revision */ /* August 30, 2007 16:25 revision */ /* January 15, 2008 13:30 revision - added capurint integrity constraints */ /* Larry Hoyle August 2007 */ /* Metadata that can be extracted from the selected SAS dataset are written to the DDI file automatically */ /* Other metadata can be entered as arguments to the macro "SAS2DDI" at the bottom of the file */ /* This version could be revised to allow a more flexible form of input into the macro variables that define */ /* element content. */ /* When entering text into the %NRSTR() function the following note from the SAS documentation applies: */ /* "Note: If an unmatched single or double quotation mark or a left or right parenthesis is used */ /* with %STR or %NRSTR, */ /* these characters must be preceded by a percent sign (%)." */ /* With globalization of the macro variables this could easily be turned into a SAS Application Dispatcher service */ /* If you want to validate the XML output by this program you should revise the "schemaLoc" */ /* macro variable to point to the instance.xsd file you have downloaded (along with the other schema files) */ /* While the DDI 3 schema allows for XHMTML content in some elements, in this program all */ /* element content is escaped by the macro Xescape. A "<", for example, is rendered as "<" */ /* This will preserve the content but will inactivate any scripts in the XHTML */ /* Generate the TEST DATASET */ /* SAS2DDI3_data.sas - generate test data for SAS to DDI3 conversion program */ /* Larry Hoyle August 2007 */ proc format; invalue sexin (just upcase) 'M' = 1 'MALE' = 1 'F' = 2 'FEMALE' = 2; ; value sexAll 0 = 'Young Male' 1 = 'Adult Male' 2 = 'Young Female' 3 = 'Adult Female' ; value sex 0 = 'Male' 1 = 'Male' 2 = 'Female' 3 = 'Female' ; /* format BMI is not used and is here to be ignored later */ value BMI low-<18.5 = "Underweight" 18.5-24.9 = "Normal weight" 25-29.9 = "Overweight" 30-high = "Obesity"; value $stext 'm','M','MALE','Male' = 'Boy' 'f','F','FEMALE','Female' = 'Girl'; /* format to test a range of numbers */ value avocadoNumber low-<0 = 'avocados owed' 1 = 'lonley avocado' 1<-6.02214149e23 = 'too few avocados' 6.02214149e23-6.02214209e23 = 'guaca mole' 6.02214209e23<-high = 'a party'; run; data mySASdata(label='Test Data for SAS to DDI 3 program'); length ID 8 name $ 30 avocado 8 sex 8 percentTime 8 DOB 8 TOB 8 entryDT 8; label ID = 'Identification Number'; label name = 'Respondant''s Name'; label avocado = 'Number of avacados'; label sex = 'Respondant''s Gender'; label percentTime = 'Percent of time counting Avacados'; label fee = 'Fee in Euros'; label entryDT = 'Date data entered'; label DOB = 'Date of birth'; label TOB = 'Time of Birth'; input id @4 sex sexin. @12 avocado percentTime fee DOB :date. TOB :time. name $ 49-78 ; entryDT=datetime(); format avocado avocadoNumber.; format sex sex.; format percentTime percent8.1; format fee EUROX10.2; format DOB IS8601DA.; format TOB IS8601TM.; format entryDT IS8601DT.; datalines; 1 M 1 .1 11000 29feb52 11:49 Joe Schmo 2 M 6.021e23 .2 10000 01jan72 14:01 Bill Hill 3 F 6.02214139e23 .3 9000 08jun85 05:25 Donna O'Fauna 4 M 6.02214149e23 .4 8000 25dec64 01:23 Rob "Bob" Cobb 5 m 6.02214159e23 .5 7000 15mar75 15:15 Tom Applebaum 6 f 6.02214179e23 .6 6000 05jun07 08:09 Louise Mac&Cheese 7 m 6.02214209e23 .7 5000 11nov11 11:11 Jack Black 8 F 6.02214219e23 .8 4000 01jan72 14:04 Jill Hill 9 m -3 .9 3000 01apr99 03:03 Gno Avocado 10 . . 2000 01jan60 01:01 ; run; /* Table of names for foreign key constraint */ data RealPeople; input RealName $30. ; datalines; Joe Schmo Bill Hill Donna O'Fauna Rob "Bob" Cobb Tom Applebaum Louise Mac&Cheese Jack Black Jill Hill Gno Avocado Poppy LaFleur Pickett Wilson Olive Hoyle ; run; /* must be a primary key, but not necessarily have the same name */ proc sql; alter table work.RealPeople add constraint prim_key Primary key(RealName); quit; /* add some integrity constraints */ /* both general (range and list) and referential */ proc sql; alter table work.mysasdata add constraint prim_key Primary key(id) add constraint DOB_present Not Null(DOB) add constraint id_GT_0 check(id GT 0) add constraint sex_MF check(sex in (.,1,2)) add constraint avocado_unique Unique(avocado) add constraint name_fkey foreign key(name) references work.RealPeople ; quit; proc sql; describe table work.mysasdata; quit; /* proc print data=mySASdata; run; */ /* **************************************************************************** */ /* ************************** end of generate Test dataset section ********* */ /* **************************************************************************** */ %Global DDILinesize; %Let DDILinesize=1024; /* maximum line length for the XML file (longer lines will fold) */ %Global maxParamLength; %Let maxParamLength=10000; /* maximum length of one of the macro variable arguments */ %Global IdentificationElementID; %Let IdentificationElementID=%NRSTR(testDDIFromSAS); /* V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V */ /* NOTE if this is revised as a Web service, the macro variables lib and dataset should not come from a Web form */ /* ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ */ filename ddi 'C:\DDRIVE\data\DDI\DDI30\SAS\SAS2DDI3\CR_myDDIfromSASwithConstraints.xml'; /* Some formats indicate measurement units and perhaps role of the variable */ /* These are some of those formats */ /* fmtname is the root name of the format (without the w.d) */ /* represents is the measurement unit that the format represents */ /* formatDescription is the description of the format from the SAS documentation */ data formatDocu; length fmtname $ 32 represents $ 32 formatDescription $ 200; input fmtname represents &:$char32. / formatDescription &:$char200./; datalines; DATE Date Writes Date values in the form ddmmmyy or ddmmmyyyy DATEAMP DateTime Writes DateTime values in the form ddmmmyy:hh:mm:ss.ss with AM or PM DATETIME DateTime Writes DateTime values in the form ddmmmyy:hh:mm:ss.ss DAY Date Writes Date values as the day of the month DDMMYY Date Writes Date values in the form ddmm{yy} yy or dd/mm/{yy}yy , where a forward slash is the separator and the year appears as either 2 or 4 digits DDMMYYB Date Writes Date values in the form ddmm{yy} yy or ddXmm {yy}yy , the year appears as either 2 or 4 digits DDMMYYC Date Writes Date values in the form ddmm{yy} yy or ddXmm:{yy}yy , the year appears as either 2 or 4 digits DDMMYYD Date Writes Date values in the form ddmm{yy} yy or ddXmm-{yy}yy , the year appears as either 2 or 4 digits DDMMYYN Date Writes Date values in the form ddmm{yy} yy or ddXmm{yy}yy , the year appears as either 2 or 4 digits DDMMYYP Date Writes Date values in the form ddmm{yy} yy or ddXmm.{yy}yy , the year appears as either 2 or 4 digits DDMMYYS Date Writes Date values in the form ddmm{yy} yy or ddXmm/{yy}yy , the year appears as either 2 or 4 digits DOWNAME Date Writes Date values as the name of the day of the week DTDATE DateTime Expects a DateTime value as input and writes date values in the form ddmmmyy or ddmmmyyyy DTMONYY Date Writes the Date part of a datetime value as the month and year in the form mmmyy or mmmyyyy DTWKDATX Date Writes the Date part of a datetime value as the day of the week and the date in the form day-of-week, dd month-name yy (or yyyy) DTYEAR Date Writes the Date part of a datetime value as the year in the form yy or yyyy DTYYQC Date Writes the Date part of a datetime value as the year and the quarter and separates them with a colon (:) EURDFDD Date Writes international Date values in the form dd.mm.yy or dd.mm.yyyy EURDFDE Date Writes international Date values in the form ddmmmyy or ddmmmyyyy EURDFDN Date Writes international Date values as the day of the week EURDFDT Date Writes international Datetime values in the form ddmmmyy:hh:mm:ss.ss or ddmmmyyyy hh:mm:ss.ss EURDFDWN Date Writes international Date values as the name of the day EURDFMN Date Writes international Date values as the name of the month EURDFMY Date Writes international Date values in the form mmmyy or mmmyyyy EURDFWDX Date Writes international Date values as the name of the month, the day, and the year in the form dd month-name yy (or yyyy ) EURDFWKX Date Writes international Date values as the name of the day and date in the form day-of-week, dd month-name yy (or yyyy) HDATE Date Writes Date values in the form yyyy mmmmm dd where dd is the day-of-the-month, mmmmm represents the month's name in Hebrew, and yyyy is the year HEBDATE Date Writes Date values according to the Jewish calendar HHMM Time Writes Time values as hours and minutes in the form hh:mm HOUR Time Writes Time values as hours and decimal fractions of hours IS8601DA Date Writes data values in the extended format YYYY-MM-DD IS8601DN DateTime Writes datetime values with only a date portion in the extended format YYYY-MM-DD IS8601DT DateTime Writes datetime values in the extended format YYYY-MM-DDThh:mm:ss[.fffff] IS8601DZ DateTime Writes datetime values with a time zone in the extended format YYYY-MM-DDThh:mm:ss[.fffff][Z]|[[+|-]hh:mm] IS8601LZ Time Writes time values with a time zone in the extended format hh:mm:ss[.fffff][Z][+|-]hh:mm] IS8601TM Time Writes time values in the extended format hh:mm:ss[.fffff] IS8601TZ Time Writes time values with a time zone in the extended format hh:mm:ss[.fffff][Z][+|-]hh:mm] JULDAY Date Writes Date values as the Julian day of the year JULIAN Date Writes Date values as Julian dates in the form yyddd or yyyyddd MINGUO Date Writes Date values as Taiwanese dates in the form yyymmdd MMDDYY Date Writes Date values in the form mmdd{yy} yy or mm/dd/{yy}yy , where a forward slash is the separator and the year appears as either 2 or 4 digits MMDDYYC Date Writes Date values in the form mmdd{yy} yy or mmXdd:{yy}yy , the year appears as either 2 or 4 digits MMDDYYD Date Writes Date values in the form mmdd{yy} yy or mmXdd-{yy}yy , the year appears as either 2 or 4 digits MMDDYYN Date Writes Date values in the form mmdd{yy} yy or mmXdd{yy}yy , the year appears as either 2 or 4 digits MMDDYYP Date Writes Date values in the form mmdd{yy} yy or mmXdd.{yy}yy , the year appears as either 2 or 4 digits MMDDYYS Date Writes Date values in the form mmdd{yy} yy or mmXdd/{yy}yy , the year appears as either 2 or 4 digits MMSS Time Writes Time values as the number of minutes and seconds since midnight MMYY Date Writes Date values in the form mmM{yy} yy, where M is the separator and the year appears as either 2 or 4 digits MMYYx Date Writes Date values in the form mm{yy} yy or mmX{yy}yy, where X represents a specified separator and the year appears as either 2 or 4 digits MONNAME Date Writes Date values as the name of the month MONTH Date Writes Date values as the month of the year MONYY Date Writes Date values as the month and the year in the form mmmyy or mmmyyyy NENGO Date Writes Date values as Japanese dates in the form e.yymmdd NLDATE Date Converts a SAS Date value to the date value of the specified locale and then writes the value in the format of date NLDATEMN Date Converts a SAS Date value to the date value of the specified locale and then writes the date value in the format of name of month NLDATEW Date Converts a SAS Date value to the date value of the specified locale, and then writes the date value in the format of the date and the day of week NLDATEWN Date Converts the SAS Date value to the date value of the specified locale and then writes the date value in the format of the name of day of week NLDATM DateTime Converts a SAS DateTime value to the datetime value of the specified locale and then writes the value in the format of datetime NLDATMAP DateTime Converts a SAS DateTime value to the datetime value of the specified locale and then writes the value in the format of datetime with a.m. or p.m. NLDATMTM Time Converts the Time portion of a SAS datetime value to the time-of-day value of the specified locale and then writes the value in the format of time of day NLDATMW Date Converts a SAS Date value to a datetime value of the specified locale and then writes the value in the format of day of week and datetime NLTIMAP Time Converts a SAS Time value to the time value of a specified locale and then writes the value in the format of a time value with a.m. or p.m. NLTIME Time Converts a SAS Time value to the time value of the specified locale and then writes the value in the format of time PDJULG Date Writes packed Julian Date values in the hexadecimal format yyyydddF for IBM PDJULI Date Writes packed Julian Date values in the hexadecimal format ccyydddF for IBM QTR Date Writes Date values as the quarter of the year QTRR Date Writes Date values as the quarter of the year in Roman numerals TIME Time Writes Time values as hours, minutes, and seconds in the form hh:mm:ss.ss TIMEAMPM Time Writes Time values as hours, minutes, and seconds in the form hh:mm:ss.ss with AM or PM TOD Time Writes the Time portion of datetime values in the form hh:mm:ss.ss WEEKDATE Date Writes Date values as the day of the week and the date in the form day-of-week, month-name dd, yy (or yyyy) WEEKDATX Date Writes Date values as the day of the week and date in the form day-of-week, dd month-name yy (or yyyy) WEEKDAY Date Writes Date values as the day of the week WEEKU Date Writes a week number in decimal format by using the U algorithm WEEKV Date Writes a week number in decimal format by using the V algorithm WEEKW Week Reads the format of the number-of-week value within the year and returns a SAS-date value using the W algorithm WORDDATE Date Writes Date values as the name of the month, the day, and the year in the form month-name dd, yyyy WORDDATX Date Writes Date values as the day, the name of the month, and the year in the form dd month-name yyyy YEAR Date Writes Date values as the year YYMM Date Writes Date values in the form {yy}yyM mm, where M is the separator and the year appears as either 2 or 4 digits YYMM Date Writes Date values in the form {yy}yymm or {yy}yyXmm, where X represents a specified separator and the year appears as either 2 or 4 digits YYMMDD Date Writes Date values in the form {yy}yymmdd or {yy}yy-mm- dd, where a dash is the separator and the year appears as either 2 or 4 digits YYMMDDx Date Writes Date values in the form {yy}yymmdd or {yy}yyXmmXdd, where X represents a specified separator and the year appears as either 2 or 4 digits YYMON Date Writes Date values in the form yymmm or yyyymmm YYQ Date Writes Date values in the form {yy}yyQ q, where Q is the separator, the year appears as either 2 or 4 digits, and q is the quarter of the year YYQx Date Writes Date values in the form {yy}yyq or {yy}yyXq, where X represents a specified separator, the year appears as either 2 or 4 digits, and q is the quarter of the year YYQR Date Writes Date values in the form {yy}yyQ qr, where Q is the separator, the year appears as either 2 or 4 digits, and qr is the quarter of the year expressed in roman numerals YYQRx Date Writes Date values in the form {yy}yy qr or {yy}yyXqr, where X represents a specified separator, the year appears as either 2 or 4 digits, and qr is the quarter of the year expressed in Roman numerals DOLLAR Currency-dollars Writes numeric values with a leading dollar sign, a comma that separates every three digits, and a period that separates the decimal fraction DOLLARX Currency-dollars Writes numeric values with a leading dollar sign, a period that separates every three digits, and a comma that separates the decimal fraction EURO Currency-euros Writes numeric values with a leading euro symbol (E), a comma that separates every three digits, and a period that separates the decimal fraction EUROX Currency-euros Writes numeric values with a leading euro symbol (E), a period that separates every three digits, and a comma that separates the decimal fraction NLMNY Currency Writes the monetary format of the local expression in the specified locale using local currency NLMNYI Currency Writes the monetary format of the international expression in the specified locale NLPCT Proportion Writes percentage data of the local expression in the specified locale NLPCTI Proportion Writes percentage data of the international expression in the specified locale PERCENT Proportion Writes numeric values as percentages PERCENTN Proportion Produces percentages, using a minus sign for negative values. SSN Social Security Number Writes Social Security numbers ; run; /* capture the current user formats in myformats */ proc format cntlout=myformats; run; /* escape the five characters that cannot appear in PCDATA */ %Macro Xescape(var=xxxx); %do; prxchange('s/\"/"/',1, prxchange('s/\''/'/',1, prxchange('s/\>/>/',1, prxchange('s/\"; put ""; run; %mend DDIInstanceStart; %Macro IdentificationElement(); %put Start Identification Element; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; %GetParam(macroVar=IdentificationElementID); put " " param +(-1) ""; put " "; run; %mend IdentificationElement; /* Citation element */ /* Duplicate ns1:DDIInstance/r:Citation subelements that match */ /* ns1:DDIInstance/r:Citation/dce:DCElements subelements into */ /* the DCElements (Dublin Core). Explicitly set the rest of the */ /* Dublin Core elements */ %Macro CitationElement(I_C_Title=DDI file from SAS dataset, I_C_SubTitle=, I_C_AlternateTitle=, I_C_Creator=, I_C_Publisher=, I_C_Contributor=, I_C_PublicationDate=, I_C_Language=, I_C_InternationalIdentifierType=, I_C_InternationalIdentifier=, I_C_Copyright=, DCE_subject=, DCE_description=, DCE_date=, DCE_type=, DCE_format=, DCE_identifier=, DCE_source=, DCE_relation=, DCE_coverage=, DCE_rights= ); %put Citation Element; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; /* title is required */ %GetParam(macroVar=I_C_Title) put " " param +(-1) " "; %GetParam(macroVar=I_C_SubTitle) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_AlternateTitle) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Creator) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Publisher) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Contributor) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_PublicationDate) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Language) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_InternationalIdentifierType) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Copyright) if param ne " " then put " " param +(-1) " "; /* Optional Dublin Core elements */ put " "; %GetParam(macroVar=I_C_Title) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Creator) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_subject) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_description) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Publisher) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Contributor) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_date) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_type) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_format) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_identifier) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_source) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=I_C_Language) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_relation) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_coverage) if param ne " " then put " " param +(-1) " "; %GetParam(macroVar=DCE_rights) if param ne " " then put " " param +(-1) " "; put " "; put " "; put " "; run; %mend CitationElement; %Macro StudyUnitStart(SU_ID=StudyUnit_001, SU_Version=1.0, SU_VersionDate=, SU_VersionResponsibility=Responsible Party, SU_Title=Study Title, SU_Creator=Study Creator, SU_Publisher=Study Publisher, SU_Contributer=Study Contributer, SU_Abstract=Abstract for the study appears here, SU_FundingAgencyID=ID of Funding Agency could go here, SU_FundingAgencyURN=URN of funding agency could go here, SU_GrantNumber=Grant number could go here, SU_Purpose=The purpose of the data goes here, SU_TopicalKeyword1=Keyword1, SU_TopicalKeyword2=Keyword2, SU_SpatialCoverageDescription=A description of the spatial coverage goes here, SU_GeographyName1=Name of first geography, SU_GeographyName2=Name of second geography, SU_StartDate=, SU_EndDate=, SU_AnalysisUnit=Unit of analysis described here, SU_GeographyNote=A note about the geography goes here, SU_Note=A general note about the Study Unit goes here); %put Start Study Unit; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; %GetParam(macroVar=SU_ID) put " " param +(-1) ""; %GetParam(macroVar=SU_Version) put " " param +(-1) ""; %GetParam(macroVar=SU_VersionDate) if param ne " " then put " " param +(-1) ""; %GetParam(macroVar=SU_VersionResponsibility) put " " param +(-1) ""; put " "; put " "; %GetParam(macroVar=SU_Title) put " " param +(-1) ""; %GetParam(macroVar=SU_Creator) put " " param +(-1) ""; %GetParam(macroVar=SU_Publisher) put " " param +(-1) ""; %GetParam(macroVar=SU_Contributer) put " " param +(-1) ""; put " "; put " "; put " Abstract_001"; %GetParam(macroVar=SU_Abstract) put " " param +(-1) ""; put " "; put " "; put " Universe_001"; put " "; put " "; put " "; %GetParam(macroVar=SU_FundingAgencyID) put " "; put " " param +(-1) ""; %GetParam(macroVar=SU_FundingAgencyURN) put " " param +(-1) ""; put " "; put " "; %GetParam(macroVar=SU_GrantNumber) put " " param +(-1) ""; put " "; put " "; put " Purpose_001"; %GetParam(macroVar=SU_Purpose) put " " param +(-1) ""; put " "; put " "; put " "; put " "; put " TopicalCoverage_001"; put " "; %GetParam(macroVar=SU_TopicalKeyword1) put " " param +(-1) ""; %GetParam(macroVar=SU_TopicalKeyword2) put " " param +(-1) ""; put " "; put " "; put " "; put " SpatialCoverage_001"; put " "; %GetParam(macroVar=SU_SpatialCoverageDescription) put " " param +(-1) ""; put " "; put " "; put " "; put " Geography_001"; put " "; put " "; %GetParam(macroVar=SU_GeographyName1) put " " param +(-1) ""; put " "; put " "; put " "; put " "; put " Geography_002"; put " "; put " "; %GetParam(macroVar=SU_GeographyName2) put " " param +(-1) ""; put " "; put " "; put " "; put " "; put " Geography_001"; put " "; put " "; put " Geography_002"; put " "; put " "; put " "; put " "; put " TemporalCoverage_001"; put " "; /* must have a non blank start date to have a Reference Date */ /* end date may be missing or not if start date is present */ /* an end date with a missing start date is invalid */ %GetParam(macroVar=SU_StartDate) if param ne " " then do; put " "; put " " param +(-1) ""; %GetParam(macroVar=SU_EndDate) put " " param +(-1) ""; put " "; end; /* SU_StartDate non blank */ put " "; put " "; %GetParam(macroVar=SU_AnalysisUnit) put " " param +(-1) ""; put " "; put " "; put " Geography_001"; put " "; put " "; put " Geography_002"; put " "; %GetParam(macroVar=SU_GeographyNote) put " " param +(-1) ""; put " "; put " "; put " "; put " StudyUnit_001"; put " "; %GetParam(macroVar=SU_Note) put " " param +(-1) "" ; put " "; put " "; run; %mend StudyUnitStart; %Macro ConceptualComponent(C_UniverseDescription=A description of the Study Universe goes here); /* ****** ConceptualComponent ******* */ %put Start Conceptual Component; data concept; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; set MyColumns2 end=last; by name; if _n_=1 then do; put " "; put " ConceptualComponent_001"; put " "; put " ConceptScheme_001"; end; /* create a concept for each user format */ /* the best wy can do for the concept is to use the variable label if present */ put " "; put " " xName +(-1) ""; if xLabel =" " then put " Concept for " xName +(-1) ""; else put " " xLabel +(-1) ""; put " "; if last then do; put " "; put " "; put " UniverseScheme_001"; put " "; put " Universe_001 "; %GetParam(macroVar=C_UniverseDescription) put " Universe_001: " param +(-1) ""; put " "; put " "; put " "; put " "; end; run; %mend ConceptualComponent; %Macro DataCollectionStart(D_ID=DataCollection_001, D_StudyNote=A note about the data collection at the Study Unit level goes here, D_GeographyNote=A note about the data collection geography goes here, D_TimeNote1=A note about the data collection time goes here, D_TimeNote2=Another note about the data collection time goes here, D_Sampling=A note about sampling goes here, D_DataSource=A note about the data source goes here, D_StartDate=, D_EndDate=, D_CollectionMode=A note about the mode of collection goes here ); %put Start Data Collection; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; %GetParam(macroVar=D_ID) put " " param +(-1) ""; put " "; put " "; put " "; put " StudyUnit_001"; put " "; %GetParam(macroVar=D_StudyNote) put " " param +(-1) ""; put " "; put " "; put " "; put " StudyUnit_001"; put " "; put " "; put " Geography_001"; put " "; put " "; put " Geography_002"; put " "; %GetParam(macroVar=D_GeographyNote) put " " param +(-1) ""; put " "; put " "; put " "; put " "; put " TimeMethod_001"; put " "; %GetParam(macroVar=D_TimeNote1) put " " param +(-1) ""; put " "; put " "; put " "; put " TimeMethod_002"; put " "; %GetParam(macroVar=D_TimeNote2) put " " param +(-1) ""; put " "; put " "; put " "; put " Sampling_001"; put " "; %GetParam(macroVar=D_Sampling) put " " param +(-1) ""; put " "; put " "; put " "; put " "; %GetParam(macroVar=D_DataSource) put " " param +(-1) ""; put " "; /* must have a non blank start date to have a DataCollectionFrequency */ /* end date may be missing or not if start date is present */ /* an end date with a missing start date is invalid */ %GetParam(macroVar=D_StartDate) if param ne " " then do; put " "; put " " param +(-1) ""; %GetParam(macroVar=D_EndDate) put " " param +(-1) ""; put " "; end; /* D_StartDate non blank */ put " "; put " "; put " CollectionMode_001"; put " "; %GetParam(macroVar=D_CollectionMode) put " " param +(-1) ""; put " "; put " "; run; %mend DataCollectionStart; %Macro QuestionSchemeStart(QuestionScheme_ID=QuestionScheme_001); %put Start Question Scheme; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; %GetParam(macroVar=QuestionScheme_ID) put " " param +(-1) ""; put " "; run; %mend QuestionSchemeStart; %Macro QuestionItem(QuestionItem_ID=Question, QuestionItem_Text=What is the question?, QuestionItem_CodeScheme=Answer42); %put QuestionItem; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; %GetParam(macroVar=QuestionItem_ID) put " " param +(-1) ""; put " "; put " "; put " "; %GetParam(macroVar=QuestionItem_Text) put " " param +(-1) ""; put " "; put " "; put " "; put " "; %GetParam(macroVar=QuestionItem_CodeScheme) put " " param +(-1) ""; put " "; put " "; put " "; run; %mend QuestionItem; %Macro QuestionSchemeEnd(); %put End Question Scheme; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; run; %mend QuestionSchemeEnd; %Macro DataCollectionEnd(); %put End Data Collection; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; put " "; put " Cleaning consisted in looking through the entered data and ensuring proper XML format"; put " "; put " "; put " "; put " "; put " Weighting_001"; put " "; put " "; put " No weighting is necessary"; put " "; put " "; put " "; put " 100%"; put " no sampling error"; put " "; put " "; put " "; put " "; run; %mend DataCollectionEnd; %Macro LogicalProductStart(L_ID=LogicalProduct_001); %put Start Logical Product; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; %GetParam(macroVar=L_ID) put " " param +(-1) ""; run; %mend LogicalProductStart; /* ****** category Scheme generated from the SAS dataset ******* */ %Macro CategoryScheme(); %put Catagory Scheme; data uniqueLabels2; set uniquelabels; by fmtname label; file ddi mod Linesize=&DDILinesize; if first.fmtname then do; put " "; put " Cats_" fmtname +(-1) ""; end; CatID=trim(fmtname)||"_"||left(put(_n_,8.)); put " "; put " " CatID +(-1) ""; put " " label +(-1) ""; put " "; if last.fmtname then do; put " "; put " "; end; run; %Mend CategoryScheme; /* ****** code Scheme generated from the SAS dataset ******* */ %Macro CodeScheme(); proc sql; create table userFormats2 as select userFormats.*, CatID from UniqueLabels2 as L,userFormats as F where L.fmtname=F.fmtname and L.label=F.label order by fmtname,start; quit; %put Code Scheme; data codes; length param $ &maxParamLength; set userFormats2; by fmtname start; file ddi mod Linesize=&DDILinesize; if first.fmtname then do; CodeSchemeID="Codes_"||trim(fmtname); put " "; put " " CodeSchemeID +(-1) ""; put " Cats_" fmtName +(-1) ""; end; if start=end then do; CodeID="C_"||trim(fmtname)||"_"||left(put(_n_,8.)); put " "; put " "; put " " CatID +(-1) ""; put " "; put " " start +(-1) ""; put " "; end; if last.fmtname then do; put " "; put " "; end; run; %Mend CodeScheme; /* ****** variable Scheme generated from the SAS dataset ******* */ %Macro VariableScheme(); %put Variable Scheme; data vars; length param $ &maxParamLength; set myColumns2 end=last; by name; file ddi mod Linesize=&DDILinesize; if _n_=1 then do; put " "; put " VariableScheme_001"; end; put " "; put " " xName +(-1) "" xName +(-1) ""; put " " xLabel +(-1) ""; /* SAS specific information goes here */ put " " @; put "SAS varnum: " varnum @; if format ne " " then put " SAS format: " format @; if informat ne " " then put " SAS informat " informat @; if idxusage ne " " then put " SAS idxusage " idxusage @; if sortedby ne 0 then put " SAS sortedby " sortedby @; if notnull ne "no" then put " SAS notnull " notnull @; if precision ne . then put " SAS precision " precision @; if scale ne . then put " SAS scale " scale @; put " SAS transcode: " transcode @; if ConstraintNote ne " " then put ConstraintNote @; put ""; put " " xName +(-1) ""; /* THIS SECTION NEEDS to be expanded to capture any other formats */ /* that indicate units of measurement */ put " "; if formatDescription ne " " then put " SAS format indicates: " formatDescription +(-1) ""; if type='num' then select (represents); when ("Date") put " "; when ("DateTime") put " "; when ("Time") put " "; otherwise put " "; end;/* type="num" */ else put " "; /* code scheme reference only for user formats */ if uFmtName ne " " then do; put " "; put " "; put " "; put " Codes_" xName +(-1) ""; put " "; put " "; put " "; end; put " "; put " "; put " "; if last then do; put " "; put " "; end; run; %Mend VariableScheme; %Macro LogicalProductEnd(); %put End Logical Product; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; run; %mend LogicalProductEnd; %Macro PhysicalDataProductStart(PD_ID=PhysicalDataProduct_001, PD_LogicalProductReference=LogicalProduct_001, PD_GrossRecordStructureID=GrossRecordStructure_001); %put Start Physical Product; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; %GetParam(macroVar=PD_ID) put " " param +(-1) ""; %GetParam(macroVar=PD_LogicalProductReference) put " " param +(-1) ""; put " "; put " "; %GetParam(macroVar=PD_GrossRecordStructureID) put " " param +(-1) ""; put " "; put " ??????"; put " "; put " "; put " EmbeddedData"; put " Data are embedded in this DDI file"; put " "; run; %mend PhysicalDataProductStart; /* ****** DataItems generated from the SAS dataset ******* */ %Macro DataItems(); proc sort data=MyColumns; by varnum; run; %put Data Items; data _null_; length param $ &maxParamLength; set myColumns end=last; by varnum; file ddi mod Linesize=&DDILinesize; put " "; put " " xName +(-1) ""; put " "; if last then put " "; run; %Mend DataItems; /* ****** dataset generated from the SAS Dataset ******* */ %Macro Dataset(); %put Dataset; data ds; set &lib..&dataset end=_last; file ddi mod Linesize=&DDILinesize; array nums{*} _numeric_; array chars{*} _character_; length _MyVarname $32; length _myValue $2000; if _n_=1 then do; put " "; put " Dataset_001"; put " Dataset_001"; put " VariableScheme_001"; end; /* for each row of data */ put " "; /* this approach requires outputting */ /* all of the numeric variables together */ /* and all of the character variables together */ /* Original column order is preserved in the VariableScheme */ /* Proc Transpose approach? */ do i=1 to dim(nums); /* get name of numeric variable */ call vname(nums{i},_MyVarname); put " "; put " " _MyVarname +(-1) ""; /* */ _myFMT=prxchange('s/\d*\.\d*//',1,vformat(nums{i})) ; /* numeric values - empty element if missing */ if nums{i} <= .Z then put " "; /* format date/time vars appropriately */ else do; select (_myFMT); when ("IS8601DA","DATE") put " " nums{i} IS8601DA. ""; when ("IS8601DT","DATETIME") put " " nums{i} IS8601DT. ""; when ("IS8601TM","TIME") put " " nums{i} IS8601TM. ""; otherwise do; /* left justify */ _MyValue=left(put(nums{i}, BEST18.)); put " " _MyValue ""; end; end; /* select */ end; /* if nums{i} <= .Z */ put " "; end; do i=1 to dim(chars); /* get name of character variable */ call vname(chars{i},_MyVarname); _MyValue=%Xescape(var=chars{i}); put " "; put " " _MyVarname +(-1) ""; put " " _MyValue ""; put " "; end; put " "; if _last then do; put " "; end; run; %Mend DataSet; %Macro PhysicalDataProductEnd(); %put End Physical Product; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; run; %mend PhysicalDataProductEnd; %Macro StudyUnitEnd(); %put End Study Unit; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; run; %mend StudyUnitEnd; %Macro DDIInstanceEnd(); %put End DDI Instance; data _null_; length param $ &maxParamLength; file ddi mod Linesize=&DDILinesize; put " "; put " "; put " "; %GetParam(macroVar=IdentificationElementID) put " " param +(-1) ""; put " "; put " The original version of this DDI file was generated from a SAS dataset" ; put " "; put " "; put ""; run; %mend DDIInstanceEnd; /* ******************************************************************************* */ /* ********* SAS2DDI Set options for the DDI File Here ************** */ /* ******************************************************************************* */ %Macro SAS2DDI(lib=work, dataset=mySASdata, XMLfile=ddi); /* lib- the library containing the dataset to output */ /* dataset- the dataset to output */ /* XMLfile- a FILEREF to the DDI XML file to be written */ /* ^^^^^^^^^^^^^ TO DO: clean the arguments ^^^^^^^^^^^^^^^^^ */ /* ^^^^^^^^^^^^^ in case this is later used in a Web app ^^^^^^^^^^^^^^^^^ */ proc sql; /* extract the column metadata and escape characters where necessary */ create table myColumnsA as select *, prxchange('s/\d*\.\d*//',1,format) as fmtName, %Xescape(var=name) as xname, %Xescape(var=label) as xlabel from dictionary.columns where libname=upcase("&lib") and memname=upcase("&dataset") order by name; create table myColumns as select MyColumnsA.*, represents, %Xescape(var=formatDescription) as formatDescription from MyColumnsA left join formatDocu on MycolumnsA.fmtname=formatDocu.fmtname order by name; /* extract the format metadata for our columns */ proc sql; create table userFormats as select myFormats.* from myformats,myColumns where myColumns.fmtName=myFormats.fmtname and myFormats.type in ("N","C"); /* make a unique list of the labels from the formats useful for concepts */ create table uniqueLabels as select distinct fmtname, label from userformats order by fmtname, label; create table MyColumns2a as select MyColumns.*, uFmtName from MyColumns left join (select distinct fmtname as uFmtName label="User Format Name" from userformats) as uf on MyColumns.fmtname=uf.ufmtname order by name; quit; /* ***************************************************** */ /* Capture metadata from integrity constraints */ /* unlike dictionary tables, proc contents displays the */ /* referenced table for a foreign key */ /* capture that reference along with other constraints */ /* ***************************************************** */ ods output Contents.DataSet.IntegrityConstraints=work.IC1; proc contents data=&lib..&dataset; run; ods output close; /* for testing ^^^^^^^^^^^^^^^^^^^^^^^^^^ */ proc sql; create table d as select dict.* from DICTIONARY.CONSTRAINT_COLUMN_USAGE as dict where dict.table_catalog=upcase(symget("lib")) and dict.table_name=upcase(symget("dataset")) order by Column_name; quit; proc sql; create table ColumnConstraints as select dict.Column_name, ic1.constraint, ic1.Type, ic1.Variables, ic1.WhereClause, ic1.Reference, ic1.OnDelete, ic1.OnUpdate from ic1,DICTIONARY.CONSTRAINT_COLUMN_USAGE as dict where dict.table_catalog=upcase(symget("lib")) and dict.table_name=upcase(symget("dataset")) and ic1.constraint=dict.constraint_name order by Column_name, type; quit; /* concatenate all of the individual constraints into a single note for each variable */ data constraintNotes; set ColumnConstraints; by Column_name; length ConstraintNote $ 200; retain ConstraintNote; keep Column_Name ConstraintNote; if first.Column_name then do; ConstraintNote="SAS Integrity Constraints: "; end; select (upcase(type)); when ("NOT NULL")ConstraintNote=trim(ConstraintNote)||" Not Null (Variables " || trim(Variables) || ") " ; when ("UNIQUE")ConstraintNote=trim(ConstraintNote)||" Unique (Variables " || trim(Variables) || ") " ; when ("CHECK")ConstraintNote=trim(ConstraintNote)||" Check(Where " || trim(WhereClause) || ") " ; when ("FOREIGN KEY")ConstraintNote=trim(ConstraintNote)||" Foreign Key(Variables [" || trim(Variables) || "] reference table " || trim(Reference) || " On Delete " || trim(OnDelete) || " On Update " || trim(OnUpdate) || ") " ; when ("PRIMARY KEY")ConstraintNote=trim(ConstraintNote)||" Primary Key(Variables " || trim(variables) || ") " ; otherwise ConstraintNote=trim(ConstraintNote)||" Type( " || trim(type) || ") " ; end; if last.Column_name then do; output; end; run; /* join the note onto the column description */ proc sql; create table MyColumns2 as select Mycolumns2a.*, ConstraintNotes.ConstraintNote from MyColumns2a left join ConstraintNotes on upcase(ConstraintNotes.column_name)=upcase(Mycolumns2a.name) order by name; quit; /* ***************************************************** */ %DDIInstanceStart(schemaLoc=%NRSTR(file:/C:/DDRIVE/data/DDI/DDI30/DDI_3.0_Part_IVB_XML_Schemas_cr/3.0/instance.xsd)) %IdentificationElement() %CitationElement(I_C_Title=%NRSTR(DDI file from SAS dataset), I_C_SubTitle=%NRSTR( ), I_C_AlternateTitle=%NRSTR( ), I_C_Creator=%NRSTR( ), I_C_Publisher=%NRSTR( ), I_C_Contributor=%NRSTR( ), I_C_PublicationDate=%NRSTR( ), I_C_Language=%NRSTR( ), I_C_InternationalIdentifierType=%NRSTR( ), I_C_InternationalIdentifier=%NRSTR( ), I_C_Copyright=%NRSTR( ), DCE_subject=%NRSTR( ), DCE_description=%NRSTR( ), DCE_date=%NRSTR( ), DCE_type=%NRSTR( ), DCE_format=%NRSTR( ), DCE_identifier=%NRSTR( ), DCE_source=%NRSTR( ), DCE_relation=%NRSTR( ), DCE_coverage=%NRSTR( ), DCE_rights=%NRSTR( ) ); %StudyUnitStart(SU_ID= %NRSTR(StudyUnit_001), SU_Version= %NRSTR(1.0), SU_VersionDate= %NRSTR( ), SU_VersionResponsibility= %NRSTR(IPSR - The University of Kansas), SU_Title= %NRSTR(Sample SAS Dataset for SAS to DDI3 Program), SU_Creator= %NRSTR(Larry Hoyle), SU_Publisher= %NRSTR(Institute for Policy & Social Research; University of Kansas), SU_Contributer= %NRSTR(Other study contributers would be listed here), SU_Abstract= %NRSTR(A sample dataset for moving data from SAS to DDI3 and back), SU_FundingAgencyID= %NRSTR(none), SU_FundingAgencyURN= %NRSTR(none), SU_GrantNumber= %NRSTR(none), SU_Purpose= %NRSTR(These data were generated by a SAS program to test SAS programs exporting data to DDI 3), SU_TopicalKeyword1= %NRSTR(SAS), SU_TopicalKeyword2= %NRSTR(DDI), SU_SpatialCoverageDescription= %NRSTR(The data were generated in Lawrence Kansas but do not apply to this location), SU_GeographyName1= %NRSTR(Kansas), SU_GeographyName2= %NRSTR(Lawrence), SU_StartDate= %NRSTR( ), SU_EndDate= %NRSTR( ), SU_AnalysisUnit= %NRSTR(imaginary person), SU_GeographyNote= %NRSTR(Kansas is a state in the United States. Lawrence is the city in which the University of Kansas is located.), SU_Note= %NRSTR(Columns were chosen to give a variety of data types and formats. The column avocados is essential to an esoteric pun. Not all elements that can occur multiple times do so in this sample. Title, & creator are two examples. A version that allows for those repeating elements will be important for testing a SAS xmlmap to read the data back in to SAS ) ) %ConceptualComponent(C_UniverseDescription= %NRSTR(All imaginary data about people) ) %DataCollectionStart(D_ID= %NRSTR(DataCollection_001), D_StudyNote= %NRSTR(The data are just made up - entered into a datalines statement in a SAS data step), D_GeographyNote= %NRSTR(Data were made up in Blake Hall in Lawrence Kansas), D_TimeNote1= %NRSTR(The dataset was created first), D_TimeNote2= %NRSTR(Metadata were added later), D_Sampling= %NRSTR(No sampling was involved), D_DataSource= %NRSTR(The imagination of Larry), D_StartDate= %NRSTR(2007-08-07 ), D_EndDate= %NRSTR(2007-08-15 ), D_CollectionMode= %NRSTR(A SAS program generated the data table. The metadata structure was copied from the file V3_MarkupTemplate.xml modified as needed in oXygen. The XML was then modified into SAS code.) ); %QuestionSchemeStart(QuestionScheme_ID= %NRSTR(QuestionScheme_001) ) %QuestionItem(QuestionItem_ID= %NRSTR(ID), QuestionItem_Text= %NRSTR(What is the ID for this observation?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(name), QuestionItem_Text= %NRSTR(What should the name of the respondent be?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(avocado), QuestionItem_Text= %NRSTR(How many avocados should the respondent have?), QuestionItem_CodeScheme= %NRSTR(Codes_AVOCADONUMBER) ); %QuestionItem(QuestionItem_ID= %NRSTR(sex), QuestionItem_Text= %NRSTR(What is the sex of the respondent?), QuestionItem_CodeScheme= %NRSTR(Codes_SEX) ) ; %QuestionItem(QuestionItem_ID= %NRSTR(percenttime), QuestionItem_Text= %NRSTR(What percent value should we try?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(fee), QuestionItem_Text= %NRSTR(Enter a value in Euros that will test capuring measurement units?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(DOB), QuestionItem_Text= %NRSTR(What date variable should we use - how about entering a Date of Birth?), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(TOB), QuestionItem_Text= %NRSTR(What time variable should we use - (how about entering the person%'s Time of Birth?)), QuestionItem_CodeScheme= ); %QuestionItem(QuestionItem_ID= %NRSTR(entryDT), QuestionItem_Text= %NRSTR(What is the DateTime when this value is being generated?), QuestionItem_CodeScheme= ); %QuestionSchemeEnd() %DataCollectionEnd() %LogicalProductStart(L_ID= %NRSTR(LogicalProduct_001) ) /* Category, Code and Variable schemes are all generated from the SAS Dataset - no parameters */ %CategoryScheme() %CodeScheme() %VariableScheme() %LogicalProductEnd() /* NOTE: LogicalProductReference must match ID from LogicalProductStart */ %PhysicalDataProductStart(PD_ID= %NRSTR(PhysicalDataProduct_001), PD_LogicalProductReference= %NRSTR(LogicalProduct_001), PD_GrossRecordStructureID= %NRSTR(GrossRecordStructure_001) ) /* ****** DataItems generated from the SAS dataset ******* */ %DataItems() /* ****** Dataset generated from the SAS dataset ******* */ %Dataset() %PhysicalDataProductEnd() %StudyUnitEnd() %DDIInstanceEnd() %Mend SAS2DDI; %SAS2DDI(lib=work, dataset=mySASdata, XMLfile=ddi);