*****************************************************************************************************************************************************************; * Last Update: June 2015 *; * *; * This SAS code reproduces our main results . *; * Note, this is NOT the code used in the actual paper. My 'old' computer died unexpectedly and I had to re-write the code. *; * I made some minor changes, which I consider improvements. So the numbers generated in this code will not exactly match the ones used in the paper. *; *****************************************************************************************************************************************************************; libname crsp 'C:\...\data\crsp'; *this folder contains three SAS datasets (dsf - daily stock returns file, dse - daily stock events file from CRSP, and dsix - daily market returns file from CRSP); libname split 'C:\...\split'; *this folder contains no SAS datasets in the beginning; ********************************************************** Part 1 - Create file with splits **********************************************************; data dse; set crsp.dse; by permno date; if DISTCD = 5523 then split=1; else split=0; *identify splits as in Baker, Greenwood, Wurgler (2009, "Catering through nominal share prices"); keep permno date DISTCD split DCLRDT RCRDDT PAYDT; run; *note to myself, there's sometimes multiple entries by date in dse, i.e. two rows or even more for the same date; *scrolling through the data, it appears that generally the first row contains some distribution code; *the second row (and higher) just contains .; data CFACPR; set crsp.dsf; by permno date; prc = abs(prc); keep permno date CFACPR prc; data CFACPR; set CFACPR; by permno date; if first.permno then n=0; n+1; lag1_prc = lag(prc); if first.permno then lag1_prc = .; abc = CFACPR; lag1_CFACPR = lag(CFACPR); if first.permno then lag1_CFACPR = .; if lag1_CFACPR > 0 then delta_CFACPR = (CFACPR-lag1_CFACPR)/lag1_CFACPR; else delta_CFACPR = .; if CFACPR > 0 then delta_CFACPR = delta_CFACPR; else delta_CFACPR = .; keep permno date prc lag1_prc abc lag1_CFACPR delta_CFACPR; data dse; merge dse CFACPR; by permno date; rename abc = CFACPR; run; data split.split; set dse; label date = 'Date (time t)'; label prc = 'Price at time t'; label lag1_prc = 'Price at time t-1 (One Trading Day Ago)'; label CFACPR = 'Cumulative Factor to Adjust Prices at time t'; label lag1_CFACPR = 'Cumulative Factor to Adjust Prices at time t-1'; label delta_CFACPR = 'Split Ratio as Inferred by Change in Cumulative Factor to Adjust Prices'; if split = 1; drop split; run; data split.index_daily; set crsp.dsix; by date; *assign index to each date; n+1; keep date n; run; data split.split_firms; set split.split; if -0.501 <= delta_CFACPR <= -0.499; *focus on 2-for-1 splits, account for rounding issues; * if PAYDT ne .; label date = 'Date Split becomes reflected in Price in CRSP (time t)'; b=round(lag1_prc); b1=(b/2)-((b-b/2)/2); b2=(b/2)+((b-b/2)/2); *b1 to b2 makes the low price range, b2 to b3 makes the high price range; b3=(b) +((b-b/2)/2); keep permno date DCLRDT PAYDT b1 b2 b3 prc lag1_prc; proc sort data=split.split_firms; by date; run; data split_firms; merge split.split_firms (in=m1) split.index_daily; by date; if m1; rename n = event_index; run; data split.split_firms; set split_firms; by event_index; n+1; beg_index = event_index-260; end_index = event_index+260; identifier=n; drop n; run; ***************************************************************** Part 2 - Create high price-range and low price-range portfolios *****************************************************************; proc sort data=crsp.dsf out=dret; by date; data dret; merge dret (in=m1) split.index_daily (in=m2); by date; if m1 and m2; proc sort data=dret; by permno date; data dret; set dret; by permno date; prc = abs(prc); me = abs(prc)*shrout/1000; keep permno date ret prc me n; data split.dret; set dret; by permno date; lag_me = lag(me); if first.permno then lag_me=.; if ret = .B then ret=.; if ret = .C then ret=.; keep permno date ret prc lag_me n; proc sort data=split.dret; by date; run; *Splits 1-2000: Time index 1-15014 -->base1; *Splits 2001-4000: Time index 14495-17133 -->base2; *Splits 4001-6000: Time index 16615-19559 -->base3; *Splits 6001-8020: Time index 19039-23534 -->base4 (There are 8071 splits. However, the time index for 20141231 is 23535, so split 8020 is the last one with enough data.); data ret1; set split.dret; if 1 <= n <= 15014; *running the code on various subsamples is a lot more efficient; apple=1; data ret2; set split.dret; if 1 <= n <= 15014; *running the code on various subsamples is a lot more efficient; rename permno = permno_split; keep permno date n ret; proc sort data=ret2; by permno_split date; run; *delete base dataset if appropriate; option nonotes; %macro index_creation; %do i = 1 %to 10; data tmp; set split.split_firms; if identifier = &i; apple=1; rename permno = permno_split; rename date = EFFDT; rename lag1_prc = pre_split_prc; keep permno date DCLRDT PAYDT lag1_prc b1 b2 b3 event_index beg_index end_index identifier apple; data tmp1; merge tmp ret1; by apple; if n < beg_index then delete; if n > end_index then delete; if b1 < prc <= b2 then low=1; else low=0; if b2 < prc <= b3 then high=1; else high=0; if lag_me ne .; if permno = permno_split then delete; *kick out the split firm from the respective high price-range and low price-range portfolios to avoid mechanical change in comovement; keep permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index identifier permno low high n date ret lag_me; run; *low price-range; data tmp2; set tmp1; if low=1; run; proc means data=tmp2 noprint; var ret /weight=lag_me; by permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index identifier n date; output out=tmp2 mean=low_vw; data tmp2; set tmp2; drop _TYPE_ _FREQ_; label low_ew = 'Low Price (ew)'; label low_vw = 'Low Price (vw)'; run; *high price-range; data tmp3; set tmp1; if high=1; run; proc means data=tmp3 noprint; var ret /weight=lag_me; by permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index identifier n date; output out=tmp3 mean=high_vw; data tmp3; set tmp3; drop _TYPE_ _FREQ_; label high_ew = 'High Price (ew)'; label high_vw = 'High Price (vw)'; run; *split; data tmp4; merge tmp (in=m1) ret2; by permno_split; if m1; if n < beg_index then delete; if n > end_index then delete; label pre_split_prc = 'Pre-Split Price'; label ret = 'Return of Firm doing Split'; keep permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index identifier n date ret; run; data tmp4; merge tmp2 tmp3 tmp4; by permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index identifier n date; event_window = n - event_index; run; proc append data=tmp4 base=split_base_1; run; %end; %mend; %index_creation; data split.split_base_1; set split_base_1; run; ***************************************************************** Part 3 - Compute shifts in price-based return comovement *****************************************************************; libname crsp 'C:\...\data\crsp'; libname split 'C:\...\split'; *append; proc datasets; delete split_base; proc append data=split.split_base_1 base=split_base; proc append data=split.split_base_2 base=split_base; proc append data=split.split_base_3 base=split_base; proc append data=split.split_base_4 base=split_base; run; *merge in prices and the corresponding price ranges; data prc; set crsp.dsf; prc = abs(prc); rename permno = permno_split; keep permno date prc; proc sort data=prc; by permno_split date; proc sort data=split_base; by permno_split date; data split_base; merge split_base (in=m1) prc; by permno_split date; if m1; run; data b; set split.split_firms; rename permno = permno_split; keep permno identifier b1 b2 b3; proc sort data=b; by identifier permno_split; proc sort data=split_base; by identifier permno_split; data split_base; merge split_base (in=m1) b; by identifier permno_split; if m1; run; proc sort data=split_base; by identifier permno_split date; data split.split_base; set split_base; run; *create some descriptive stats on price path and when a stock hits the high price range; proc sort data=split.split_base out=event; by event_window; proc means data=event noprint; var prc b1 b2 b3; by event_window; output out=event mean=prc b1 b2 b3; run; libname split 'C:\...\split'; data zzz; set split.split_base; if -240 < event_window <= -20 then event = -1; if 20 <= event_window < 240 then event = 1; if event in (-1,1); proc sort data=zzz; by identifier permno_split EFFDT PAYDT event_index event; run; *************************************************************************************************************************************************************************************************; * This is new but I think it makes sense: *; * When looking at the price evolution prior to a split, we observe that companies on average only enter the high price range 6 months prior to the split. *; * Shifts in comovement are computed by comparing the beta w.r.t. the low- and high price range one year before and one year after the split. *; * If some firm spends most of its time in the year prior to the split in the low price range, then we would not necessarily expect to detect a shift in price-based return comovement *; * even if investors categorized stocks based on price (e.g., prior to the split, a firm spends 90% of its time in the low price range - after the split, the firm spends 100% of its time *; * in the low price range. Can we really expect to see a meaningful change in return comovement w.r.t. low- and high price range after the split?) *; * Here, I start computing how much a stock comoves with the high- and low price range prior to the split only after the stock enters the high price range for the very first time. *; * I require there to be a minimum of 60 observations before and after the split. *; *************************************************************************************************************************************************************************************************; data a; set zzz; if prc > b2 then cross=1; else cross=0; if cross=1; run; proc sort data=a nodupkey; by identifier permno_split EFFDT PAYDT event_index; data a; set a; rename event_window = first_cross_b2; keep identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index event_window; data zzz; merge zzz a; by identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index; if event_window > first_cross_b2; run; proc means data=zzz noprint; var ret; by identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index event; output out=count n=obs; data count; set count; drop _TYPE_ _FREQ_; run; proc reg data=zzz outest=parms1 noprint; model ret = low_vw high_vw; by identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index event; run; quit; data parms1; set parms1; keep identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index event low_vw high_vw; run; proc reg data=zzz outest=parms2 noprint rsquare; model ret = low_vw; by identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index event; run; quit; data parms2; set parms2; rename low_vw = low_vw_univariate; rename _RSQ_ = rsquared; keep identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index event low_vw _RSQ_; run; proc reg data=zzz outest=parms3 noprint rsquare; model ret = high_vw; by identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index event; run; quit; data parms3; set parms3; rename high_vw = high_vw_univariate; rename _RSQ_ = rsquared; keep identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index event high_vw _RSQ_; run; data parms; merge parms1 (in=m1) parms2 parms3 count; by identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index event; if m1; run; data before; set parms; if event=-1; rename rsquared = before_rsquared; rename low_vw_univariate = before_low_uni; rename high_vw_univariate = before_high_uni; rename low_vw = before_low; rename high_vw = before_high; rename obs = before_obs; drop event; data after; set parms; if event=1; rename rsquared = after_rsquared; rename low_vw_univariate = after_low_uni; rename high_vw_univariate = after_high_uni; rename low_vw = after_low; rename high_vw = after_high; rename obs = after_obs; drop event; run; data main; merge before (in=m1) after (in=m2); by identifier permno_split DCLRDT EFFDT PAYDT pre_split_prc event_index; if m1 and m2; diff_rsquared = after_rsquared - before_rsquared; diff_low_uni = after_low_uni - before_low_uni; diff_high_uni = after_high_uni - before_high_uni; diff_diff_uni = diff_low_uni - diff_high_uni; diff_low = after_low - before_low; diff_high = after_high - before_high; yyyymm = year(EFFDT)*100+month(EFFDT); if before_obs > 60; *require a minimum of 60 observations before the split; if after_obs > 60; *require a minimum of 60 observations after the split; if pre_split_prc > 10; * if 1926 <= year(EFFDT) <= 2004; run; proc surveymeans data=main mean t; cluster yyyymm; var diff_rsquared diff_low_uni diff_high_uni diff_diff_uni diff_low diff_high before_obs after_obs; run; *****************************************************************; * Last time I checked (June 2015), for data from 1926 to 2014, *; * these are the results I got for the bivarate regressions: *; * diff_low +0.246010 (11.22) *; * diff_high -0.109669 (-6.11) *; *****************************************************************;