Excel – VBA code to read tables from Word document


Need help to modify this VBA code to read multiple tables from a Word document. It only reads one table, but I would like to import more than one into the same Excel sheet.

Sub ImportWordTables()
   'Imports a table from Word document 

   Dim wdDoc         As Object
   Dim wdFileName    As Variant
   Dim TableNo       As Integer  'number of tables in Word doc
   Dim iTable        As Integer  'table number index
   Dim iRow          As Long     'row index in Excel
   Dim iCol          As Integer  'column index in Excel

   wdFileName = Application.GetOpenFilename("Word files (*.doc*),*.doc*", , _
     "Browse for file containing table to be imported")

   If wdFileName = False Then Exit Sub '(user cancelled import file browser)

   Set wdDoc = GetObject(wdFileName)   'open Word file

   With wdDoc
      TableNo = wdDoc.tables.Count
      If TableNo = 0 Then
         MsgBox "This document contains no tables", _
         vbExclamation, "Import Word Table"
      ElseIf TableNo > 1 Then
         TableNo = InputBox("This Word document contains " & TableNo & " tables." & vbCrLf & _
         "Enter table number of table to import", "Import Word Table", "1")
      End If
      With .tables(TableNo)
         'copy cell contents from Word table cells to Excel cells
         For iRow = 1 To .Rows.Count
            For iCol = 1 To .Columns.Count
               Cells(iRow, iCol) = WorksheetFunction.Clean(.cell(iRow, iCol).Range.Text)
            Next iCol
         Next iRow
      End With
   End With
   Set wdDoc = Nothing       
End Sub

Best Solution

So this is the code, but it doesn't entirely answer my questions. I just need the tables from the pdf.

Sub Imp_Into_XL(PDF_File As String, Each_Sheet As Boolean)

'This procedure get the PDF data into excel by following way

'1.Open PDF file
'2.Looping through pages
'3.get the each PDF page data into individual _
  sheets or single sheet as defined in Each_Sheet Parameter

Dim AC_PD As Acrobat.AcroPDDoc              'access pdf file
Dim AC_Hi As Acrobat.AcroHiliteList         'set selection word count
Dim AC_PG As Acrobat.AcroPDPage             'get the particular page
Dim AC_PGTxt As Acrobat.AcroPDTextSelect    'get the text of selection area

Dim WS_PDF As Worksheet
Dim RW_Ct As Long                           'row count
Dim Col_Num As Integer                      'column count
Dim Li_Row As Long                          'Maximum rows limit for one column
Dim Yes_Fir As Boolean                      'to identify beginning of page

Li_Row = Rows.Count

Dim Ct_Page As Long                         'count pages in pdf file
Dim i As Long, j As Long, k As Long         'looping variables
Dim T_Str As String

Dim Hld_Txt As Variant                      'get PDF total text into array

RW_Ct = 0                                   'set the intial value
Col_Num = 1                                 'set the intial value

Application.ScreenUpdating = False

Set AC_PD = New Acrobat.AcroPDDoc
Set AC_Hi = New Acrobat.AcroHiliteList

'set maximum selection area of PDF page

AC_Hi.Add 0, 32767

With AC_PD

    'open PDF file

    .Open PDF_File

    'get the number of pages of PDF file

    Ct_Page = .GetNumPages

    'if get pages is failed exit sub

    If Ct_Page = -1 Then
        MsgBox "Pages Cannot determine in PDF file '" & PDF_File & "'"
        GoTo h_end
    End If

    'add sheet only one time if Data retrive in one sheet

    If Each_Sheet = False Then
        Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
        WS_PDF.Name = "PDF3Text"
    End If

    'looping through sheets

    For i = 1 To Ct_Page

        T_Str = ""
        'get the page
        Set AC_PG = .AcquirePage(i - 1)

        'get the full page selection
        Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)

        'if text selected successfully get the all the text into T_Str string

        If Not AC_PGTxt Is Nothing Then

            With AC_PGTxt

                For j = 0 To .GetNumText - 1
                    T_Str = T_Str & .GetText(j)
                Next j

            End With

        End If

        If Each_Sheet = True Then

            'add each sheet for each page

            Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))

        End If

        'transfer PDF data into sheet

        With WS_PDF

            If Each_Sheet = True Then

                .Name = "Page-" & i

                'get the PDF data into each sheet for each PDF page

                'if text accessed successfully then split T_Str by VbCrLf
                'and get into array Hld_Txt and looping through array and fill sheet with PDF data

                If T_Str <> "" Then
                    Hld_Txt = Split(T_Str, vbCrLf)

                    For k = 0 To UBound(Hld_Txt)
                        T_Str = CStr(Hld_Txt(k))
                        If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
                        .Cells(k + 1, 1).Value = T_Str
                    Next k

                            'information if text not retrive from PDF page

                            .Cells(1, 1).Value = "No text found in page " & i
                End If


                        'get the pdf data into single sheet

                        If T_Str <> "" Then
                            Hld_Txt = Split(T_Str, vbCrLf)

                            Yes_Fir = True

                            For k = 0 To UBound(Hld_Txt)

                                RW_Ct = RW_Ct + 1

                                'check begining of page if yes enter PDF page number for any idenfication

                                If Yes_Fir Then
                                    RW_Ct = RW_Ct + 1
                                    .Cells(RW_Ct, Col_Num).Value = "Text In Page - " & i
                                    RW_Ct = RW_Ct + 2
                                    Yes_Fir = False
                                End If

                                'check for maximum rows if exceeds start from next column

                                If RW_Ct > Li_Row Then
                                    RW_Ct = 1
                                    Col_Num = Col_Num + 1
                                End If

                                T_Str = CStr(Hld_Txt(k))
                                If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
                                .Cells(RW_Ct, Col_Num).Value = T_Str

                            Next k


                                    RW_Ct = RW_Ct + 1
                                    .Cells(RW_Ct, Col_Num).Value = "No text found in page " & i
                                    RW_Ct = RW_Ct + 1

                        End If

            End If

        End With
    Next i


End With

Application.ScreenUpdating = True

MsgBox "Imported"


Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing

End Sub
Related Question