欢迎光临
我们一直在努力

实例讲解ASP实现抓取网上房产信息-ASP教程,ASP应用

建站超值云服务器,限时71元/月

<%@language=”vbscript” codepage=”936″%>
<!– #include file=”conn.asp” –>

<!– #include file=”inc/function.asp” –>
<!doctype html public “-//w3c//dtd html 4.01 transitional//en” “http://www.w3.org/tr/html4/loose.dtd”>
<html>
<head>
<title>untitled document</title>
<meta http-equiv=”content-type” content=”text/html; charset=gb2312″>
<meta http-equiv=”refresh” content=”300;url=steal_house.asp”>
</head>

<body>
<%
on error resume next

server.scripttimeout = 999999
========================================================
字符编码函数
====================================================
function bytestobstr(body,code)
        dim objstream
        set objstream = server.createobject(“adodb.stream”)
        objstream.type = 1
        objstream.mode =3
        objstream.open
        objstream.write body
        objstream.position = 0
        objstream.type = 2
        objstream.charset =code
        bytestobstr = objstream.readtext 
        objstream.close
        set objstream = nothing
end function

取行字符串在另一字符串中的出现位置
function newstring(wstr,strng)
        newstring=instr(lcase(wstr),lcase(strng))
        if newstring<=0 then newstring=len(wstr)
end function
替换字符串函数
function replacestr(ori,str1,str2)
replacestr=replace(ori,str1,str2)
end function
====================================================
function readxml(url,code,start,ends)
set osend=createobject(“microsoft.xmlhttp”)
sourcecode = osend.open (“get”,url,false)
osend.send()
readxml=bytestobstr(osend.responsebody,code )
start=instr(readxml,start)
readxml=mid(readxml,start)
ends=instr(readxml,ends)
readxml=left(readxml,ends-1)
end function

function substr(body,start,ends)
start=instr(body,start)
substr=mid(body,start+len(start)+1)
ends=instr(substr,ends)
substr=left(substr,ends-1)
end function

dim getcont,newscontent
dim url,title
url=”http://www.***.com”新闻网址knowsky.com
getcont=readxml(url,”gb2312″,”<table class=k2 border=””0″””,”</table>”)
getcont=regexhtml(getcont)
dim keyid,newsclass,city,position,housetype,level,area,price,demostra

dim contactman,contact
for i=2 to ubound(getcont)
 response.write(getcont(i)&”__<br>”)
 
 templink=mid(getcont(i),instr(getcont(i),”href=”””)+6,instr(getcont(i),””” onclick”)-10)
 templink=replace(templink,”../”,””)
 
 response.write(i&”:”&templink&”<br>”)
 newscontent=readxml(templink,”gb2312″,”<td valign=””bottom”” width=””400″”>”,”<hr width=””760″” noshade size=””1″” color=””#808080″”> “)
 newscontent=removehtml(newscontent)
 newscontent=replace(newscontent,vbcrlf,””)
 newscontent=replace(newscontent,vbnewline,””)
 newscontent=replace(newscontent,” “,””)
 newscontent=replace(newscontent,” “,””)
 newscontent=replace(newscontent,”&nbsp;”,””)
 newscontent=replace(newscontent,”\n”,””)
 newscontent=replace(newscontent,chr(10),””)
 newscontent=replace(newscontent,chr(13),””)
 ===============get content=======================
 response.write(newscontent)
 keyid=substr(newscontent,”列号:”,”信息类别:”)
 newsclass=substr(newscontent,”类别:”,”所在城市:”)
 city=substr(newscontent,”城市:”,”房屋具体位置:”)
 position=substr(newscontent,”位置:”,”房屋类型:”)
 housetype=substr(newscontent,”类型:”,”楼层:”)
 level=substr(newscontent,”楼层:”,”使用面积:”)
 area=substr(newscontent,”面积:”,”房价:”)
 price=substr(newscontent,”房价:”,”其他说明:”)
 demostra=substr(newscontent,”说明:”,”联系人:”)
 contactman=substr(newscontent,”联系人:”,”联系方式:”)
 contact=substr(newscontent,”联系方式:”,”信息来源:”)
 response.write(“总序列号:”&keyid&”<br>”)
 response.write(“信息类别:”&newsclass&”<br>”)
 response.write(“所在城市:”&city&”<br>”)
 response.write(“房屋具体位置:”&position&”<br>”)
 response.write(“房屋类型:”&housetype&”<br>”)
 response.write(“楼层:”&level&”<br>”)
 response.write(“使用面积:”&area&”<br>”)
 response.write(“房价:”&price&”<br>”)
 response.write(“其他说明:”&demostra&”<br>”)
 response.write(“联系人:”&contactman&”<br>”)
 response.write(“联系方式:”&contact&”<br>”)
 title=removehtml(aa(i))
 response.write(“title:”&title)
 for n=0 to application.contents.count
   if(application.contents(n)=keyid) then
    ifexit=true    
   end if  
 next 
 if not ifexit then
   application(time&i)=keyid
 添加到数据库
 ====================================================
 set rs=server.createobject(“adodb.recordset”)
 rs.open “select top 1 * from news order by id desc”,conn,3,3
 rs.addnew
 rs(“newsclass”)=newsclass
 rs(“city”)=city
 rs(“position”)=position
 rs(“housetype”)=housetype
 rs(“level”)=level
 rs(“area”)=area
 rs(“price”)=price
 rs(“demostra”)=demostra
 rs(“contactman”)=contactman
 rs(“contact”)=contact
 rs.update
 rs.close
 set rs=nothing
 end if
 ==================================================
 
next
function removetag(body)

 set regex = new regexp
 regex.pattern = “<[a].*?<\/[a]>”
 regex.ignorecase = true
 regex.global = true
 set matches = regex.execute(body)
 dim i,arr(15),ifexit
 i=0
 j=0
 for each match in matches
  tempstr = match.value 
  tempstr=replace(tempstr,”<td>”,””)
  tempstr=replace(tempstr,”</td>”,””)
  tempstr=replace(tempstr,”<tr>”,””)
  tempstr=replace(tempstr,”</tr>”,””) 
  arr(i)=tempstr 
  i=i+1
  if(i>=15) then
   exit for
  end if
 next
 set regex=nothing
 set matches =nothing
 removetag=arr
 
end function
function regexhtml(body)
 dim r_arr(47),r_temp
 set regex2 = new regexp
 regex2.pattern =”<a.*?<\/a>”
 regex2.ignorecase = true
 regex2.global = true
 set matches2 = regex2.execute(body)
 iii=0
 for each match in matches2
 
  r_arr(iii)=match.value
 
  iii=iii+1 
 next
 regexhtml=r_arr
 set regex2=nothing
 set matches2=nothing
end function
======================================================

conn.close
set conn=nothing
%>
</body>
</html>

 

  function.asp

 <%
**************************************************
函数名:gottopic
作  用:截字符串,汉字一个算两个字符,英文算一个字符
参  数:str   —-原字符串
       strlen —-截取长度
返回值:截取后的字符串
**************************************************
function gottopic(str,strlen)
 if str=”” then
  gottopic=””
  exit function
 end if
 dim l,t,c, i
 str=replace(replace(replace(replace(str,”&nbsp;”,” “),”&quot;”,chr(34)),”&gt;”,”>”),”&lt;”,”<“)
 str=replace(str,”?”,””)
 l=len(str)
 t=0
 for i=1 to l
  c=abs(asc(mid(str,i,1)))
  if c>255 then
   t=t+2
  else
   t=t+1
  end if
  if t>=strlen then
   gottopic=left(str,i) & “…”
   exit for
  else
   gottopic=str
  end if
 next
 gottopic=replace(replace(replace(replace(gottopic,” “,”&nbsp;”),chr(34),”&quot;”),”>”,”&gt;”),”<“,”&lt;”)
end function
=========================================================
函数:removehtml(strhtml)
功能:去除html标记
参数:strhtml  –要去除html标记的字符串
=========================================================
function removehtml(strhtml)
dim objregexp, match, matches
set objregexp = new regexp

objregexp.ignorecase = true
objregexp.global = true
取闭合的<>
objregexp.pattern = “<.+?>”
进行匹配
set matches = objregexp.execute(strhtml)

遍历匹配集合,并替换掉匹配的项目
for each match in matches
strhtml=replace(strhtml,match.value,””)
next
removehtml=strhtml
set objregexp = nothing
set matches=nothing
end function

%>
 

  conn.asp

 <%
on error resume next
set conn=server.createobject(“adodb.connection”)
con= “driver={microsoft access driver (*.mdb)};dbq=” & server.mappath(“stest.mdb”)
conn.open con

sub connclose
   conn.close
   set conn=nothing  
end sub
%>
 

赞(0)
版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com 特别注意:本站所有转载文章言论不代表本站观点! 本站所提供的图片等素材,版权归原作者所有,如需使用,请与原作者联系。未经允许不得转载:IDC资讯中心 » 实例讲解ASP实现抓取网上房产信息-ASP教程,ASP应用
分享到: 更多 (0)