|
<%@language=”vbscript” codepage=”936″%> <!– #include file=”conn.asp” –>
<!– #include file=”inc/function.asp” –> <!doctype html public “-//w3c//dtd html 4.01 transitional//en” “http://www.w3.org/tr/html4/loose.dtd”> <html> <head> <title>untitled document</title> <meta http-equiv=”content-type” content=”text/html; charset=gb2312″> <meta http-equiv=”refresh” content=”300;url=steal_house.asp”> </head>
<body> <% on error resume next
server.scripttimeout = 999999 ======================================================== 字符编码函数 ==================================================== function bytestobstr(body,code) dim objstream set objstream = server.createobject(“adodb.stream”) objstream.type = 1 objstream.mode =3 objstream.open objstream.write body objstream.position = 0 objstream.type = 2 objstream.charset =code bytestobstr = objstream.readtext objstream.close set objstream = nothing end function
取行字符串在另一字符串中的出现位置 function newstring(wstr,strng) newstring=instr(lcase(wstr),lcase(strng)) if newstring<=0 then newstring=len(wstr) end function 替换字符串函数 function replacestr(ori,str1,str2) replacestr=replace(ori,str1,str2) end function ==================================================== function readxml(url,code,start,ends) set osend=createobject(“microsoft.xmlhttp”) sourcecode = osend.open (“get”,url,false) osend.send() readxml=bytestobstr(osend.responsebody,code ) start=instr(readxml,start) readxml=mid(readxml,start) ends=instr(readxml,ends) readxml=left(readxml,ends-1) end function
function substr(body,start,ends) start=instr(body,start) substr=mid(body,start+len(start)+1) ends=instr(substr,ends) substr=left(substr,ends-1) end function
dim getcont,newscontent dim url,title url=”http://www.***.com”新闻网址 getcont=readxml(url,”gb2312″,”<table class=k2 border=””0″””,”</table>”) getcont=regexhtml(getcont) dim keyid,newsclass,city,position,housetype,level,area,price,demostra
dim contactman,contact for i=2 to ubound(getcont) response.write(getcont(i)&”__<br>”) templink=mid(getcont(i),instr(getcont(i),”href=”””)+6,instr(getcont(i),””” onclick”)-10) templink=replace(templink,”../”,””) response.write(i&”:”&templink&”<br>”) newscontent=readxml(templink,”gb2312″,”<td valign=””bottom”” width=””400″”>”,”<hr width=””760″” noshade size=””1″” color=””#808080″”> “) newscontent=removehtml(newscontent) newscontent=replace(newscontent,vbcrlf,””) newscontent=replace(newscontent,vbnewline,””) newscontent=replace(newscontent,” “,””) newscontent=replace(newscontent,” “,””) newscontent=replace(newscontent,” ”,””) newscontent=replace(newscontent,”\n”,””) newscontent=replace(newscontent,chr(10),””) newscontent=replace(newscontent,chr(13),””) ===============get content======================= response.write(newscontent) keyid=substr(newscontent,”列号:”,”信息类别:”) newsclass=substr(newscontent,”类别:”,”所在城市:”) city=substr(newscontent,”城市:”,”房屋具体位置:”) position=substr(newscontent,”位置:”,”房屋类型:”) housetype=substr(newscontent,”类型:”,”楼层:”) level=substr(newscontent,”楼层:”,”使用面积:”) area=substr(newscontent,”面积:”,”房价:”) price=substr(newscontent,”房价:”,”其他说明:”) demostra=substr(newscontent,”说明:”,”联系人:”) contactman=substr(newscontent,”联系人:”,”联系方式:”) contact=substr(newscontent,”联系方式:”,”信息来源:”) response.write(“总序列号:”&keyid&”<br>”) response.write(“信息类别:”&newsclass&”<br>”) response.write(“所在城市:”&city&”<br>”) response.write(“房屋具体位置:”&position&”<br>”) response.write(“房屋类型:”&housetype&”<br>”) response.write(“楼层:”&level&”<br>”) response.write(“使用面积:”&area&”<br>”) response.write(“房价:”&price&”<br>”) response.write(“其他说明:”&demostra&”<br>”) response.write(“联系人:”&contactman&”<br>”) response.write(“联系方式:”&contact&”<br>”) title=removehtml(aa(i)) response.write(“title:”&title) for n=0 to application.contents.count if(application.contents(n)=keyid) then ifexit=true end if next if not ifexit then application(time&i)=keyid 添加到数据库 ==================================================== set rs=server.createobject(“adodb.recordset”) rs.open “select top 1 * from news order by id desc”,conn,3,3 rs.addnew rs(“newsclass”)=newsclass rs(“city”)=city rs(“position”)=position rs(“housetype”)=housetype rs(“level”)=level rs(“area”)=area rs(“price”)=price rs(“demostra”)=demostra rs(“contactman”)=contactman rs(“contact”)=contact rs.update rs.close set rs=nothing end if ================================================== next function removetag(body)
set regex = new regexp regex.pattern = “<[a].*?<\/[a]>” regex.ignorecase = true regex.global = true set matches = regex.execute(body) dim i,arr(15),ifexit i=0 j=0 for each match in matches tempstr = match.value tempstr=replace(tempstr,”<td>”,””) tempstr=replace(tempstr,”</td>”,””) tempstr=replace(tempstr,”<tr>”,””) tempstr=replace(tempstr,”</tr>”,””) arr(i)=tempstr i=i+1 if(i>=15) then exit for end if next set regex=nothing set matches =nothing removetag=arr end function function regexhtml(body) dim r_arr(47),r_temp set regex2 = new regexp regex2.pattern =”<a.*?<\/a>” regex2.ignorecase = true regex2.global = true set matches2 = regex2.execute(body) iii=0 for each match in matches2 r_arr(iii)=match.value iii=iii+1 next regexhtml=r_arr set regex2=nothing set matches2=nothing end function ======================================================
conn.close set conn=nothing %> </body> </html>
|