PCCM

proc sql;

       create table Secm (where=(prccm>0)) as

       select distinct a.gvkey, a.iid, intnx('month',datadate,0,'e') as datadate format=date9.,

       round(prccm, 0.01) as prccm

       from  comp.Secm(where=(lengthn(strip(iid))=2))/*USA Firm Only*/ as a

       inner join (

              select distinct gvkey from comp.funda

              where sale>. or at>. /*Firm with Sales or Total Asset only*/

       ) as b

       on a.gvkey=b.gvkey;

quit;

 

/*CRSP Monthly Security File Cleansing*/

proc sql;

       create table msf(where=(prc>0)) as

       select distinct a.permno, intnx('month',date,0,'e') as date format=date9.,

       round(abs(prc), 0.01) as prc

       from crsp.msf as a inner join crsp.msenames(where=(shrcd in (10 11 12 30 31 32))) as b

       on a.permno=b.permno;

quit;

 

/*CUSIP Matching*/

proc sql;

       create table cusip_match as

       select distinct a.gvkey, a.iid, b.permno

       from comp.security as a inner join crsp.msenames as b

       on substr(a.cusip,1,8)=substr(b.ncusip,1,8)

       inner join (select distinct permno from msf) as c

       on b.permno=c.permno

       inner join (select distinct gvkey, iid from secm) as d

       on a.gvkey=d.gvkey and a.iid=d.iid;

quit;

 

 

/*A Blind Price Matching*/

proc sql;

       create table _pccm_ as

       select distinct a.gvkey, permno, a.iid

       from Secm as a inner join msf as b

       on datadate=date and prc>0.5 and prc=prccm  /*Removing very long price matching*/

       group by b.permno, a.gvkey, a.iid

       having count(distinct datadate)> 7 ; /*Parameter 2*/

quit;

 

/*Refine matching with time dimension*/

proc sql;

       create table meta as

       select distinct a.permno, intnx('month',date,0,'e') as DATE format=date9.,

       b.gvkey, b.iid, prc, prccm, prc>0.5 and prc=prccm as sdum /*Removing very long price matching*/

       from msf as a inner join _pccm_ as b

       on a.permno=b.permno

       inner join Secm as c

       on b.gvkey=c.gvkey and b.iid=c.iid and datadate=date

       order by permno, gvkey, iid, date;

quit;

 

/*Counting backwards to fine-tuning the matched point*/

PROC EXPAND data=meta OUT=meta METHOD=NONE;

       by permno gvkey iid;

       CONVERT sdum = MS_sdum / TRANSFORM=(MOVSUM 15); /*Parameter 2*/

       id date;

RUN

 

/* Counting forwards for fine-tuning the matched point (Three steps:)*/

proc sort data=meta out=meta; by permno gvkey iid descending date; run;

 

data meta; set meta; by permno gvkey iid descending date;

       idx+1;

       if first.iid then idx=1;

run;

 

PROC EXPAND data=meta OUT=meta METHOD=NONE;

       by permno gvkey iid;

       CONVERT MS_sdum = MS_mdum / TRANSFORM=(MOVmax 15); /*Parameter 2*/

       id idx;

RUN

 

/*Keep consequent matched price and date points*/

proc sql undo_policy=none;

       create table meta as

       select distinct *, sum(sdum) as hcnt "historical matched count"

       from meta(drop=idx)

       where MS_mdum>=1 and MS_sdum>=1

       group by permno, gvkey, iid

       having max(ms_sdum)> 7 /*Parameter 2*/

       order by permno, gvkey, iid, date;

quit;

 

/*Among the remained matched ones, count the closest spelling distance*/

proc sql;

       create table mname_match as

       select distinct a.gvkey, a.iid, a.permno, spedis(upcase(compress(COMNAM,,'sp')), upcase(compress(conm,,'sp'))) as spedis

       from (select distinct gvkey, iid, permno from meta) as a

       left join crsp.msenames as b

       on a.permno=b.permno

       left join comp.company as c

       on a.gvkey=c.gvkey

       group by a.permno, a.gvkey, a.iid

       having spedis(COMNAM, conm)=max(spedis(COMNAM, conm));

quit;

 

/*Among the remained matched ones, check whether same tickers are evered shared*/

proc sql;

       create table tick_match as

       select distinct a.gvkey, a.iid, a.permno, scan(upcase(b.ticker),1) = scan(upcase(c.tic),1) as tmatched

       from (select distinct gvkey, iid, permno from meta) as a

       left join crsp.msenames(where=(lengthn(TICKER)>0)) as b

       on a.permno=b.permno

       left join comp.Security(where=(lengthn(tic)>0)) as c

       on a.gvkey=c.gvkey and a.iid=c.iid

       group by a.permno, a.gvkey, a.iid

       having (scan(upcase(b.ticker),1) = scan(upcase(c.tic),1))

              =max(scan(upcase(b.ticker),1) = scan(upcase(c.tic),1));

quit;

 

/*Matched name and ticker measures back to consequent matching results and

       construct key monthly statistics*/

proc sql;

       create table _pccm_ as

       select distinct a.*, max(hcnt) as mhcnt, max(MS_sdum) as mms_sdum, max(ms_mdum) as mms_mdum,

       count(distinct catt(a.gvkey, a.iid)) as gcnt, b.permno>. as cusip_matched, spedis,

       max(b.permno>.) as mcusip_matched, tmatched, max(tmatched) as mtmatched

       from meta as a left join cusip_match as b

       on a.permno=b.permno and a.gvkey=b.gvkey and a.iid=b.iid

       left join mname_match as c

       on a.permno=c.permno and a.gvkey=c.gvkey and a.iid=c.iid

       left join Tick_match as d

       on a.permno=d.permno and a.gvkey=d.gvkey and a.iid=d.iid

       group by a.permno, date

       order by a.permno, date, a.gvkey, a.iid;

quit;

/*Cleansing procedure:

       Among multiple matches, keep the ones with either cusip matched or ticker matched

       For the remainig ones, some random statistics are used to remove wrong matched results*/

data _pccm_; set _pccm_;

       if gcnt>1 and mcusip_matched=1 and cusip_matched^=1 then delete; /*Keep CUSIP Matched Only*/

       else if gcnt>1 and mtmatched=1 and tmatched^=1 then delete; /*Keep CUSIP Matched Only*/

       if mcusip_matched=0 and mtmatched=0 and spedis>10 and MS_mdum<=3 and hcnt<5 then delete;/*Keep the ones with similar names but matched only a few times - Value decided by observations*/

       else if mcusip_matched=0 and mtmatched=0 and spedis>30 and hcnt<15 then delete;/*Keep the ones with different names but with a lot matches - Value decided by observations*/

       drop mhcnt mms_sdum mms_mdum gcnt mcusip_matched mtmatched;

run;

 

/*To have the best one-to-one (permno to gvkey iid) match*/

proc sql;

       create table pccm as

       select distinct *

       from (

              select distinct *

              from (

                     select distinct *

                     from (

                           select distinct *

                           from _pccm_

                           group by permno, date

                           having hcnt=max(hcnt) /*1) among the current best, get the best historical best*/

                     )

                     group by permno, date

                     having MS_sdum=max(MS_sdum) /*2) at given day, get the best matches*/

              )

              group by gvkey, iid, date

              having hcnt=max(hcnt)

              )

       group by gvkey, iid, date

       having MS_sdum=max(MS_sdum) ;

quit;

 

/*Patching based on observations */

proc sql undo_policy=none;

       create table pccm as

       select distinct *

       from (

              select distinct *

              from pccm

              group by permno, gvkey, date

              having iid=min(iid)

       )

       group by permno, date

       having spedis=min(spedis)

       order by permno, date;

quit;

 

 

/*Add Time dimensions to PERMNO_to_IID through CUSIP*/

proc sql;

       create table cusip_match_valid as

       select distinct a.permno, a.date, b.gvkey, b.iid

       from msf as a inner join Cusip_match as b

       on a.permno=b.permno

       inner join (

                     select distinct gvkey, datadate from comp.funda

                     where sale>. or at>. 

              ) as c

       on b.gvkey=c.gvkey and

       intnx('month', datadate, -24, 'e')<a.date<=intnx('month', datadate, 12, 'e')

       left join pccm as d

       on a.permno=d.permno and b.gvkey=d.gvkey and b.iid=d.iid and a.date=d.date

       group by a.permno, a.date

       having (d.permno>.)=max(d.permno>.) and count(distinct catt(gvkey,iid))=1;

quit;

 

/*Combine CUSIP and PRICE match*/

proc sql;

       create table _link_table_ as

       select distinct a.permno, a.date, b.gvkey, b.iid,

       c.gvkey as cusip_gvkey, c.iid as cusip_iid

       from msf as a left join pccm as b

       on a.permno=b.permno and a.date=b.date

       left join cusip_match_valid as c

       on a.permno=c.permno and a.date=c.date

       order by permno, gvkey, iid, date;

quit;

 

/*Making a range (two steps)*/

data _link_table_(where=(lengthn(gvkey)>0)); set _link_table_;

       if gvkey="" and iid="" then do;

              gvkey=cusip_gvkey; iid=cusip_iid;

       end;

       drop cusip_gvkey cusip_iid;

proc sort; by permno gvkey iid date;

run;

 

data _link_table_; set _link_table_; by permno gvkey iid date;

       retain from;

       ldate=lag(date);

       if first.iid then do; ldate=.; from=date; end;

       else if intnx('month',ldate, 1, 'e')<intnx('month',date, 0, 'e') then

              from=date;

       format from date9.;

run;

 

/*Save the matched Table*/

proc sql;

       create table pccm_table as

       select distinct permno, gvkey, iid, from, date as to format=date9.

       from _link_table_

       group by permno, gvkey, iid, from

       having date=max(date);

quit;

 

Comments