2017-10-16 56 views
1

我想从网页读取源代码并从中提取一些数据。 我在这里用我的例子中的一个RegEx来提取数据,但我没有得到 的任何数据,也许这是由于unicode或模式dosn't匹配? 当我测试这个模式与RegExBuddy它匹配,但在VBScript不? 也许,我错过了代码中的某些东西,或者我必须用另一种方式重新编写代码?从网页读取源代码并从中提取一些数据

这里是我的尝试:

Option Explicit 
Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,Data 
Set fso = CreateObject("Scripting.FileSystemObject") 
Set ws = CreateObject("Wscript.Shell") 
LogFile = Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName, ".")) & "txt" 
if fso.FileExists(LogFile) Then 
    fso.DeleteFile LogFile 
end If 

sSrcUrl = "https://fr.giveawayoftheday.com/" 
Set oHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0") 
bGetAsAsync = False 
oHTTP.open "GET", sSrcUrl, bGetAsAsync 
oHTTP.send 
If oHTTP.status <> 200 Then 
WScript.Echo "unexpected status = " & oHTTP.status & vbCrLf & oHTTP.statusText 
WScript.Quit 
End If 
Data = oHTTP.responseText 
WriteLog Data,LogFile 
wscript.echo Extract(Data) 
'**************************************************************** 
Function Extract(Data) 
    Dim oRE,oMatches,Match,Line 
    set oRE = New RegExp 
    oRE.IgnoreCase = True 
    oRE.Global = True 
    oRE.MultiLine = True 
    oRE.Pattern = "<div class=""giveaway_wrap cf"">(\r.*\n.*){17}</div>" 
    set oMatches = oRE.Execute(Data) 
    If not isEmpty(oMatches) then 
     For Each Match in oMatches 
      Line = Match.Value 
      Extract = Line 
     Next 
    End if 
End Function 
'***************************************************************** 
Sub WriteLog(strText,LogFile) 
    Dim fs,ts 
    Const ForWriting = 2 
    Set fs = CreateObject("Scripting.FileSystemObject") 
    Set ts = fs.OpenTextFile(LogFile,ForWriting,True,-1) 
    ts.WriteLine strText 
    ts.Close 
End Sub 
'***************************************************************** 

所以,我希望得到的结果是:

<div class="giveaway_wrap cf"> 
       <div class="giveaway_img"> 
        <img src="https://giveawayoftheday.com/wp-content/uploads/2017/10/82810932353ab590bf475ea3980f3038.png" alt="Excel Url Validator 1.0 Giveaway" /> 
        <div class="giveaway_label"> 
         <a href="https://fr.giveawayoftheday.com/excel-url-validator-1-0/" class="label_link"></a> 
         <div class="old_price">$40.00</div> 
         <div class="free"> 
          <span class="big">GRATUIT</span> aujourd’hui 
         </div> 
        </div> 
       </div> 
       <div class="over"> 
        <div class="giveaway_title"> 
         <a href="https://fr.giveawayoftheday.com/excel-url-validator-1-0/">Excel Url Validator 1.0</a> 
         <div class="giveaway_date">16 octobre 2017</div> 
        </div> 
        <div class="giveaway_descr">Excel Url Validator trouve des liens rompus dans les feuilles de calcul Excel.</div> 
       </div> 
+2

的赠品一个HTA文件还有另一个(更好)的方式:使用HTML解析器。单独使用正则表达式很快就会失败。 –

+0

@TimBiegeleisen你能告诉我怎么做?谢谢 ! – Hackoo

+0

我不太清楚VB对你有用。但我赞成你。 –

回答

0

我得到了这样的解决方案:

Option Explicit 
Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,Data 
Set fso = CreateObject("Scripting.FileSystemObject") 
Set ws = CreateObject("Wscript.Shell") 
LogFile = Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName, ".")) & "htm" 
if fso.FileExists(LogFile) Then 
    fso.DeleteFile LogFile 
end If 

sSrcUrl = "https://fr.giveawayoftheday.com/" 
Set oHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0") 
bGetAsAsync = False 
oHTTP.open "GET", sSrcUrl, bGetAsAsync 
oHTTP.send 
If oHTTP.status <> 200 Then 
WScript.Echo "unexpected status = " & oHTTP.status & vbCrLf & oHTTP.statusText 
WScript.Quit 
End If 
Data = oHTTP.responseText 
WriteLog Extract(Data),LogFile 
wscript.echo Extract(Data) 
'**************************************************************** 
Function Extract(Data) 
    Dim oRE,oMatches,Match,Line 
    set oRE = New RegExp 
    oRE.IgnoreCase = True 
    oRE.Global = True 
    oRE.MultiLine = True 
    oRE.Pattern = "<div class=""giveaway_wrap cf"">(?:(?!""giveaway_counter first"">)[\s\S])*</div>" 
    set oMatches = oRE.Execute(Data) 
    If not isEmpty(oMatches) then 
     For Each Match in oMatches 
      Line = Match.Value 
      Extract = Line 
     Next 
    End if 
End Function 
'***************************************************************** 
Sub WriteLog(strText,LogFile) 
    Dim fs,ts 
    Const ForWriting = 2 
    Set fs = CreateObject("Scripting.FileSystemObject") 
    Set ts = fs.OpenTextFile(LogFile,ForWriting,True,-1) 
    ts.WriteLine strText 
    ts.Close 
End Sub 
'***************************************************************** 

编辑29/10/2017

代码更新,弹出显示当天

Option Explicit 
Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,HTA,Data 
Set fso = CreateObject("Scripting.FileSystemObject") 
Set ws = CreateObject("Wscript.Shell") 
LogFile = Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName, ".")) & "hta" 
if fso.FileExists(LogFile) Then 
    fso.DeleteFile LogFile 
end If 

If IsInternetConnected = True Then 
    If Lang = True Then 
     sSrcUrl = "https://fr.giveawayoftheday.com/" 
    Else 
     sSrcUrl = "https://www.giveawayoftheday.com/" 
    End if 
End If 

Set oHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0") 
bGetAsAsync = False 
oHTTP.open "GET", sSrcUrl, bGetAsAsync 
oHTTP.send 
If oHTTP.status <> 200 Then 
WScript.Echo "unexpected status = " & oHTTP.status & vbCrLf & oHTTP.statusText 
WScript.Quit 
End If 
Data = oHTTP.responseText 
HTA = "<html>" & vbCrLf &_ 
"<title>Giveaway of the day by Hackoo</title>" & vbCrLf &_ 
"<head>" & vbCrLf &_ 
"<HTA:APPLICATION" & vbCrLf &_ 
    "APPLICATIONNAME=""GiveAway of the Day""" & vbCrLf &_ 
    "Icon=DxDiag.exe" & vbCrLf &_ 
    "BORDER=""thin""" & vbCrLf &_ 
    "MAXIMIZEBUTTON=""no""" & vbCrLf &_ 
    "MINIMIZEBUTTON=""no""" & vbCrLf &_ 
    "SCROLL=""no""" & vbCrLf &_ 
    "SINGLEINSTANCE=""yes""" & vbCrLf &_ 
    "CONTEXTMENU=""no""" & vbCrLf &_ 
    "SELECTION=""no""/>" & vbCrLf &_ 
"<SCRIPT language=""VBScript"">" & vbCrLf &_ 
"Sub Window_OnLoad" & vbCrLf &_ 
    "window.resizeTo 450,380" & vbCrLf &_ 
    "WindowLeft = (window.screen.availWidth - 450)" & vbCrLf &_ 
    "WindowTop = (window.screen.availHeight - 380)" & vbCrLf &_ 
    "window.moveTo WindowLeft, WindowTop" & vbCrLf &_ 
"end sub" & vbCrLf &_ 
"</script>" & vbCrLf &_ 
"</head>" & vbCrLf &_ 
"<center>" & vbCrLf &_ 
"<meta http-equiv=""Content-Type"" content=""text/html; charset=UTF-8"" />" & vbCrLf &_ 
"<meta http-equiv=""X-UA-Compatible"" content=""IE=edge"">" & vbCrLf &_ 
"<link rel=""stylesheet"" href=""https://www.giveawayoftheday.com/css/main.css"" />" 
WriteLog HTA,LogFile 
WriteLog Extract(Data),LogFile 
WriteLog "</html>",LogFile 
ws.run LogFile 
'**************************************************************** 
Function Extract(Data) 
    Dim oRE,oMatches,Match,Line 
    set oRE = New RegExp 
    oRE.IgnoreCase = True 
    oRE.Global = True 
    oRE.MultiLine = True 
    oRE.Pattern = "<div class=""giveaway_wrap cf"">(?:(?!""giveaway_counter first"">)[\s\S])*</div>" 
    set oMatches = oRE.Execute(Data) 
    If not isEmpty(oMatches) then 
     For Each Match in oMatches 
      Line = Match.Value 
      Extract = Line 
     Next 
    End if 
End Function 
'***************************************************************** 
Sub WriteLog(strText,LogFile) 
    Dim fs,ts 
    Const ForAppending = 8 
    Set fs = CreateObject("Scripting.FileSystemObject") 
    Set ts = fs.OpenTextFile(LogFile,ForAppending,True,-1) 
    ts.WriteLine strText 
    ts.Close 
End Sub 
'***************************************************************** 
Function Lang() 
Dim sComputer,oWMI,colOperatingSystems,oOS,iOSLang 
    sComputer = "." 
    Set oWMI = GetObject("winmgmts:" _ 
     & "{impersonationLevel=impersonate}!\\" _ 
     & sComputer _ 
     & "\root\cimv2") 
Set colOperatingSystems = oWMI.ExecQuery _ 
     ("Select * from Win32_OperatingSystem") 
For Each oOS in colOperatingSystems 
    iOSLang = oOS.OSLanguage 
Next 
If (iOSLang = 1036) Then 
    Lang = True 
Else 
    Lang = False 
End If 
End Function 
'***************************************************************** 
Function IsInternetConnected() 
Dim MyLoop,strComputer,objPing,objStatus 
IsInternetConnected = False 
MyLoop = True 
While MyLoop = True 
    strComputer = "smtp.gmail.com" 
    Set objPing = GetObject("winmgmts:{impersonationLevel=impersonate}!\\").ExecQuery _ 
    ("select * from Win32_PingStatus where address = '" & strComputer & "'") 
    For Each objStatus in objPing 
     If objStatus.Statuscode = 0 Then 
      MyLoop = False 
      IsInternetConnected = True 
      Exit Function 
     End If 
    Next 
    MsgBox "Check your internet connection !",vbExclamation,"Check your internet connection !" 
    Pause(10) 'To sleep for 10 secondes 
Wend 
End Function 
'****************************************************************** 
Sub Pause(NSeconds) 
    Wscript.Sleep(NSeconds*1000) 
End Sub 
'****************************************************************** 

enter image description here

+0

真棒排除模式! – omegastripes

+0

@omegastripes检查更新的版本==>更新代码以弹出显示当天赠品的HTA文件 – Hackoo