SAS Macros for Merging Sibling Map and Data Files.

A SAS program and macros code printed below may be used to make a wide data file. The program has two sections: a) user input section and b) execution section. The program requires input to three macro variables and the user must provide valid values to these macro variables in the user input section (see SAS program below): Input the value to the right of "=" sign on a macro definition row and leave ";" to mark the end of the definition. Use SAS help on how to create a macro variable.

Following macro variables are used in the program:

At the end of the run, users must check SAS log for errors and take appropriate action to fix it.

From this process, two output files --"long" and "wide"-- named as "map filename" + "_long" and "map filename" + "_wide" are copied to the input data files folder.

Researchers can also subset rows from the map file as to fit the type individuals and siblings they want to include into the final analysis. Such sub setting can reduce the number of columns and rows from the wide file. It's recommended to avoid carrying extra variables in the wide file.

SAS Program:

--- copy SAS code below this line to the end ---


/**	===USER INPUT MACRO VARIABLES SECTION BEGINS===	**/
/**	---Enter values for macro variables below---	**/

%let path= ; 		/*	Data files folder */
%let mapfile= ;		/*	Map filename 	  */
%let datafile= ;	/*	Data filename 	  */

/**	===USER INPUT MACRO VARIABLES SECTION ENDS=== 	**/

/**	===DO NOT EDIT BELOW THIS LINE=== 	**/
/**	===     EXECUTION SECTION     === 	**/

LIBNAME TMP "&path" ;
* options mprint ;

**	copy files to work library ** ;
DATA &datafile ;
	SET TMP.&datafile ;
RUN ;

**	check for ID68 and PN as first and second variables ** ;
%let ikey = 0 ;
proc sql noprint ;
select trim(varname) into :ikey separated by '-'
from (
select varnum, upcase(name) as varname
from dictionary.columns
where libname = 'WORK' and upcase(memname) = %upcase("&datafile")
and varnum <= 2
) order by varnum ;
quit ;
%put &ikey ;
%macro ikey ;
	%if "&ikey" ne "ID68-PN" %then %do ;
		%PUT ERROR: ID68 and PN are not the first and second variables ;
		%PUT ERROR- on the input data set (%UPCASE(&datafile)). ;
	%end ;
%mend ikey;
%ikey ;

**	check for duplicate key values ** ;
%let idup = 0 ;
proc sql noprint ;
select trim(left(put(count(*), best.))) into :idup
from (
select id68, pn
from &datafile
group by id68, pn
having count(*) > 1 )
;
quit ;

%macro idup ;
	%if &idup > 0 %then %do ;
		%PUT ERROR: The input data set (%UPCASE(&datafile)) has duplicate rows. ;
	%end ;
%mend idup;
%idup ;

DATA &mapfile ;
	FORMAT MAPID F6. LINKID Z8.3 ;
	SET TMP.&mapfile ;
	MAPID = _N_ ;
	LINKID = ID68+PN/1000 ;
RUN ;


proc sql ;
create table var as
select varnum, upcase(name) as varname label=' '
from dictionary.columns
where libname='WORK' and memname=%upcase("&datafile")
order by varnum
;
quit ;

data _null_ ;
	set var ;
	call symput('var'||trim(left(_n_)), trim(left(varname)) ) ;
	call symput('nvar', trim(left(_n_)) ) ;
run ;
%put &nvar ;


proc sql ;
create table mvar as
select varnum
	, upcase(name) as varname label=' '
	, format as vFormat label=' '
	, case substr(name, 1, 4)
		when 'ID68' then substr(name, 5)
		else ' ' end as psfix label=' '
from dictionary.columns
where libname='WORK' and memname=%upcase("&mapfile")
order by varnum
;
quit ;


data _null_ ;
	set mvar ;
	call symput('mvar'||trim(left(_n_)), trim(left(varname)) ) ;
	call symput('mFmt'||trim(left(_n_)), trim(left(vFormat)) ) ;
	call symput('nmvar', trim(left(_n_)) ) ;
	if substr(varname, 1, 4) = 'ID68' then do ;
		if psfix = ' ' then psfix = 'I' ;
		n + 1 ;
		call symput('psfix'||trim(left(n)), trim(left(psfix)) ) ;
		call symput('npsfix', trim(left(n)) ) ;
	end ;
run ;
%put # of map vars = &nmvar ;
%put Map prefix types = &npsfix ;

**	long file ;
%macro iad ;
%do i=1 %to &npsfix ;
	proc sql ;
		create table &&psfix&i as
		select
		  a.MAPID
		, a.LINKID

		%if &i = 1 %then %do ;
	  		, a.ID68
			, a.PN
			, a.sex
			%do j=3 %to &nvar ;
				, b.&&var&j
			%end ;
		%end ;
		%else %if &i > 1 %then %do ;
			, a.sibNum
			, a.ID68&&psfix&i
			, a.PN&&psfix&i
			, a.SibSex
			, a.sibType
			%do j=3 %to &nvar ;
				, b.&&var&j as &&var&j.._&&psfix&i
			%end ;
		%end ;

		from &mapfile a inner join &datafile b
		%if &i = 1 %then %do ;
			on (a.ID68 = b.ID68 and a.PN = b.PN)
		%end ;
		%if &i > 1 %then %do ;
			on (a.ID68&&psfix&i = b.ID68 and a.PN&&psfix&i = b.PN)
		%end ;
		order by a.mapid
		;
	quit ;
%end ;

data &mapfile._long (drop = mapid linkid) ;
	format
	%do k=1 %to %eval(&nmvar-2) ;
	&&mvar&k &&mFmt&k
	%end ; ;
	merge
	%do i=1 %to &npsfix ;
	&&psfix&i (in=&&psfix&i)
	%end ; ;
		by mapid ;
	if I ;
run ;

%mend iad ;
%iad ;


data s2 (DROP=MAPID) ;
	set s (rename=(ID68S=ID68_S PNS=PN_S SIBSEX=SEX_S SIBTYPE=TYPE_S)
	) ;
run ;


proc sql noprint ;
create table sibnum as
select distinct sibnum
from s2
order by sibnum
;
quit ;
data _null_ ;
	set sibnum ;
	call symput('sib'||trim(left(_n_)), put(sibnum, z2.) ) ;
	call symput('nsibs', trim(left(_n_)) ) ;
run ;
%put &nsibs &sib1;


proc sql ;
create table svar as
select
  varnum
, upcase(name) as varname label=' '
, label as vlabel label=' '
, format as vFormat label=' '
from dictionary.columns
where libname='WORK' and memname="S2"
and varnum >= 3
order by varnum
;
quit ;
data _null_ ;
	set svar ;
	call symput('svar'||trim(left(_n_)), trim(left(varname)) ) ;
	call symput('nsvar', trim(left(_n_)) ) ;
run ;
%put &nsvar &svar1;


%macro iad2 ;
data
	%do i=1 %to &nsibs ;
		snum_&&sib&i (rename = (
		%do j=1 %to &nsvar ;
			&&svar&j = &&svar&j..&&sib&i
		%end ;
		))
	%end ;
	;
	set s2 ;
	%do i=1 %to &nsibs ;
		%if &i > 1 %then %do ;
			else
		%end ;
		if sibnum = &i then output snum_&&sib&i ;
	%end ;
run ;

%do i=1 %to &nsibs ;
	proc sort data = snum_&&sib&i nodupkey ;
		by linkid ;
	run ;
%end ;
%mend iad2 ;
%iad2 ;


proc sort data = i nodupkey out = i2 ;
	by linkid ;
run ;
%macro iadw ;
data &mapfile._wide (drop=linkid) ;
	merge i2 (drop=mapid)
	%do i=1 %to &nsibs;
		snum_&&sib&i (drop=sibnum)
	%end ; ;
		by linkid ;
run ;
%mend iadw ;
%iadw ;


**	# of lines per individual type ;
proc sql ;
create table wtype as
select
  memname as d2_ format=$10. label=' '
, nvar-2 as nvar label=' '
, nobs label=' '
from dictionary.tables
where libname = 'WORK'
and (memname = 'I2' or memname like 'SNUM%')
order by memname
;
quit ;


**	Save/copy output data files ;
proc datasets lib=work nolist ;
	save &mapfile._long &mapfile._wide ;
	copy in=work out=tmp ;
		select &mapfile._long &mapfile._wide;
quit ;