Home » excel » html – Data Scraping Elements By ClassName

html – Data Scraping Elements By ClassName

Posted by: admin May 14, 2020 Leave a comment

Questions:

I am trying to pull data from a web-site, I want to copy the ’10’ x 5’unit (class name is “unit_size medium”) ‘ in row 1 for which I am able to copy data successfully but I also want promo (Class name is “promo_offers”) ‘1st Month Free!’ in row 2, the problem is this promo is given for specific cells only. hence the data is misleading and I am getting promo in 1st 4 cells and then getting error. However, I want to copy promo for only those units where promo information is given else the cell should be blank or any other value needs to be set. Below is the code…

Please suggest how to frame the code.

Sub GetClassNames()

    Dim html As HTMLDocument

    Dim objIE As Object
    Dim element As IHTMLElement
    Dim ie As InternetExplorer
    Dim elements As IHTMLElementCollection
    Dim result As String 'string variable that will hold our result link

    Dim count As Long
    Dim erow As Long

    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
    count = 0

    Set html = objIE.document
    Set elements = html.getElementsByClassName("unit_size medium")

    For Each element In elements
        If element.className = "unit_size medium" Then
            erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
            Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText

            Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
            count = count + 1      
        End If
    Next element
End Sub
How to&Answers:

I would simply wrap in an On Error Resume Next when attempting to access the element. Have a place already reserved for it in an output array so if not present the place remains empty.

Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
    Dim ie As New InternetExplorer, ws As Worksheet
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    With ie
        .Visible = True
        .Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long
        headers = Array("size", "features", "promo", "in store", "web")
        Set listings = .document.getElementById("small_units_accordion_panel").getElementsByTagName("li")
        '.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate

        ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
        For Each listing In listings
            r = r + 1
            On Error Resume Next
            results(r, 1) = listing.getElementsByClassName("unit_size medium")(0).innerText
            results(r, 2) = listing.getElementsByClassName("features")(0).innerText
            results(r, 3) = listing.getElementsByClassName("promo_offers")(0).innerText
            results(r, 4) = listing.getElementsByClassName("board_rate")(0).innerText
            results(r, 5) = listing.getElementsByClassName("price")(0).innerText
            On Error GoTo 0
        Next
        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
        .Quit
    End With
End Sub

All boxes:

Option Explicit

'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
    Dim ie As New InternetExplorer, ws As Worksheet
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    With ie
        .Visible = True
        .Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim listings As Object, listing As Object, headers(), results()
        Dim r As Long, list As Object, item As Object
        headers = Array("size", "features", "promo", "in store", "web")
        Set list = .document.getElementsByClassName("main_unit")
        '.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate
        Dim rowCount As Long
        rowCount = .document.querySelectorAll(".main_unit li").Length
        ReDim results(1 To rowCount, 1 To UBound(headers) + 1)
        For Each listing In list
            For Each item In listing.getElementsByTagName("li")
                r = r + 1
                On Error Resume Next
                results(r, 1) = item.getElementsByClassName("unit_size medium")(0).innerText
                results(r, 2) = item.getElementsByClassName("features")(0).innerText
                results(r, 3) = item.getElementsByClassName("promo_offers")(0).innerText
                results(r, 4) = item.getElementsByClassName("board_rate")(0).innerText
                results(r, 5) = item.getElementsByClassName("price")(0).innerText
                On Error GoTo 0
            Next
        Next
        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
        .Quit
    End With
End Sub