CAI Yishu

Ph.D. Candidate | CityU of HK

Let every man be his own methodologist, let every man be his own theorist. ― C. Wright Mills


/*
	Stata Codes
	For scholarly communication only
*/

clear
set more off
cap mkdir "Your Dir"   
cd "Your Dir"

*1.1 Single Page
copy "http://stockdata.stock.hexun.com/zrbg/data/zrbList.aspx?date=2016-12-31&count=20&pname=20&titType=null&page=1" "1.txt",replace
local file = "1.txt"
unicode analyze "`file'"
unicode encoding set gb18030
unicode translate "`file'", transutf8
unicode erasebackups, badidea
set obs 1
gen v =fileread("1.txt")
split v,p(`",{Number:"')
sxpose,clear
drop in 1
format _var1 %100s
gen stkcd = ustrregexs(1) if ustrregexm( _var1 ,`"aspx\?code=([0-9]+)&date"')
gen year = ustrregexs(1) if ustrregexm(_var1,`"&date=([0-9]+-[0-9]+-[0-9]+)',industry"')
gen stdname = ustrregexs(1) if ustrregexm(_var1,`"industry:'(.+\([0-9]+\))',stockNumber"')
gen stockNumber = ustrregexs(1) if ustrregexm(_var1,`"stockNumber:'([0-9]+\.[0-9]+)',"')
gen industryrate = ustrregexs(1) if ustrregexm(_var1,`"industryrate:'([0-9]+\.[0-9]+)',"')
gen Pricelimit = ustrregexs(1) if ustrregexm(_var1,`"Pricelimit:'([A-Z])',"')
gen lootingchips = ustrregexs(1) if ustrregexm(_var1,`"lootingchips:'([0-9]+\.[0-9]+)',"')
gen Scramble = ustrregexs(1) if ustrregexm(_var1,`"Scramble:'([0-9]+\.[0-9]+)',"')
gen rscramble = ustrregexs(1) if ustrregexm(_var1,`"rscramble:'([0-9]+\.[0-9]+)',"')
gen Strongstock = ustrregexs(1) if ustrregexm(_var1,`"Strongstock:'([0-9]+\.[0-9]+)',"')
drop _var1


*1.2 Multiple Pages
forvalues j = 2010(1)2018{
forvalues i = 1(1)190{

    clear all
	copy "http://stockdata.stock.hexun.com/zrbg/data/zrbList.aspx?date=`j'-12-31&count=20&pname=20&titType=null&page=`i'" "1.txt",replace
	local file = "1.txt"
	unicode analyze "`file'"
	unicode encoding set gb18030
	unicode translate "`file'", transutf8
	unicode erasebackups, badidea
	set obs 2
	gen v =fileread("`file'")
	drop in 1
	split v,p(`",{Number:"')
	sxpose,clear
	drop in 1
	format _var1 %100s
	gen stkcd = ustrregexs(1) if ustrregexm( _var1 ,`"aspx\?code=([0-9]+)&date"')
	gen year = ustrregexs(1) if ustrregexm(_var1,`"&date=([0-9]+-[0-9]+-[0-9]+)',industry"')
	gen stdname = ustrregexs(1) if ustrregexm(_var1,`"industry:'(.+\([0-9]+\))',stockNumber"')
	gen  stockNumber = ustrregexs(1) if ustrregexm(_var1,`"stockNumber:'([0-9]+\.[0-9]+)',"')
	gen  industryrate = ustrregexs(1) if ustrregexm(_var1,`"industryrate:'([0-9]+\.[0-9]+)',"')
	gen  Pricelimit = ustrregexs(1) if ustrregexm(_var1,`"Pricelimit:'([A-Z])',"')
	gen  lootingchips = ustrregexs(1) if ustrregexm(_var1,`"lootingchips:'([0-9]+\.[0-9]+)',"')
	gen  Scramble = ustrregexs(1) if ustrregexm(_var1,`"Scramble:'([0-9]+\.[0-9]+)',"')
	gen  rscramble = ustrregexs(1) if ustrregexm(_var1,`"rscramble:'([0-9]+\.[0-9]+)',"')
	gen  Strongstock = ustrregexs(1) if ustrregexm(_var1,`"Strongstock:'([0-9]+\.[0-9]+)',"')
	drop _var1
	save `j'`i',replace
	sleep 1000
}
}


*2. Combinating
clear
cd "Your Dir"
local files: dir "Your Dir" files "*.dta"  
dis `files'
foreach file in `files'{
append using "`file'"

sort stkcd year
drop if stkcd==""
save "和讯网企业社会责任.dta",replace

label variable stkcd "股票代码"
label variable year "截止日期"
label variable stdname "股票名称"
label variable stockNumber "股东责任"
label variable industryrate "总得分"
label variable Pricelimit "等级"
label variable lootingchips "员工责任"
label variable Scramble "供应商、客户和消费者权益责任"
label variable rscramble "环境责任"
label variable Strongstock "社会责任"
rename stockNumber Shareholder_responsibility
rename Shareholder_responsibility sr
rename industryrate TS
rename Pricelimit rank
rename lootingchips er
rename Scramble sccr
rename rscramble enr
rename Strongstock sor
sort stkcd year
order stkcd year stdname TS rank sr er sccr enr sor

*3. Exporting
save 和讯网企业社会责任,replace
export excel using "和讯网企业社会责任.xlsx", firstrow(varlabels) replace