* have: patent level dataset with * group variable identifiers on patents (eg gvkey, MSA, state, etc) * time variable (eg month, quarter, or year of application or grant) * X vars you want to aggregate to a group-time panel cap prog drop patstat_agg prog def patstat_agg syntax varlist, Groupvar(varname) Timevar(varname) Depreciation(real) Windowsize(integer) [Idvar(varname)] /* You have a dataset with observations belonging to a group and time. Each observation has some statistics X. This will create a group-time dataset with max(X) for the group over some time window [t-W,t] avg(X) for the group over some time window [t-W,t] stock(X) for the group over some time window [t-W,t] where stock is sum(X*(1-deprec)^lag)/count(X). This is the average X but weighted towards more recent time periods. It is intuitively also a stock. This is the method of creating patent stocks in Bowen, Fresard, and Hoberg Forthcoming. Notes: 1. The resulting dataset will have all possible combinations of group-time. This means the resulting dataset will likely have excess observations. 2. Any observations with missing values for group or time will be ignored. 3. Windowsize = 5 means stats cover [t-4,t] 4. Depreciation should be *per period*. If 20% a year on annual data, use 0.20. If 20% a year on quarterly data, use 0.05. Example: // it replicates the stocking function in the paper: use if retech != . using pat_lv, clear keep if ayear > 1970 & ayear <= 2010 // keep some burn in patstat_agg retech , g(vxfirm_id) t(aqtr) d(0.05) w(20) drop if year(dofq(aqtr)) < 1980 // drop burn in period rename (aqtr retech_stock) (qtr retech_stock_fcn) merge 1:1 vxfirm qtr using startup_qtr_panel, keepusing(retech_stock) keep(3) pwcorr *stoc* // the function is more general. here, used on industry-year variables: use "http://www.stata-press.com/data/r10/abdata.dta", clear patstat_agg emp wage indoutpt , g(ind) t(year) d(0.20) w(4) i(cap) // if you don't tell it an observation in a group is denoted by cap using // the i() option, you have to have a variable called pnum use "http://www.stata-press.com/data/r10/abdata.dta", clear rename cap pnum patstat_agg emp wage indoutpt , g(ind) t(year) d(0.20) w(4) */ qui{ if "`idvar'" == "" { local idvar pnum } drop if missing(`groupvar') | missing(`timevar') // set up locals for collapse and stat commands local max_collapse local sum_collapse local count_collapse local max_range local sum_range local count_range foreach v in `varlist' { local max_collapse "`max_collapse' max_`v' = `v'" local sum_collapse "`sum_collapse' sum_`v' = `v'" local count_collapse "`count_collapse' count_`v' = `v'" local max_range "`max_range' `v'_max_roll`windowsize' = max_`v'" local sum_range "`sum_range' `v'_sum_roll`windowsize' = sum_`v'" local count_range "`count_range' `v'_count_roll`windowsize' = count_`v'" } // collapse to G-T panel, with each group-time's average and max patent stats within the period collapse (sum) `sum_collapse' (max) `max_collapse' /// (count) `count_collapse', by(`groupvar' `timevar') // the panel should have no gaps and 0s where missing values exist tsset `groupvar' `timevar' tsfill, full foreach v of varlist max_* sum_* count_* { replace `v' = 0 if `v' == . } // compute the "max" within the windows (honestly), and store sums for the average pat stat in window // yes: both of these can be done in many ways, this is kind of a "free" ride // as we code towards the rolling stock local lookback = 1-`windowsize' // ex: stats over [-3,0] for win length = 4 rangestat (max) `max_range' /// (sum) `sum_range' `count_range' /// , interval(`timevar' `lookback' 0) by(`groupvar') foreach v in `varlist' { g `v'_avg_roll = `v'_sum_roll`windowsize' / `v'_count_roll`windowsize' lab var `v'_max_roll`win' "max: q+[`lookback', 0])" lab var `v'_avg_roll`win' "avg: q+[`lookback', 0])" } // create the rolling window "stocks" (a rolling average weighted towards present) // sum over patents in window ( (1-d)^t * X ) // stock ------------------------------------------ // count of patents in that window local lookback = `windowsize' - 1 // ex Lags 0,1,2,3 for win length = 4 local perc = `depreciation'*100 sort `groupvar' `timevar' foreach v in `varlist' { g `v'_stock`win' = 0 forval lag = 0/`lookback' { by `groupvar' (`timevar'): replace `v'_stock`win' = `v'_stock`win' + L`lag'.sum_`v' * (1-`depreciation')^`lag' if _n > `lag' } replace `v'_stock`win' = `v'_stock`win' / `v'_count_roll`windowsize' replace `v'_stock`win' = 0 if `v'_count_roll`windowsize' == 0 lab var `v'_stock`win' "`v' stock: [t-`lookback',t] (`perc'% deprec) " } // output order `groupvar' `timevar' keep `groupvar' `timevar' *_max_roll* *_avg_roll* *_stock* } end