BBT wrote: > I am trying to parse a word (.docx) for tables, then copy these tables > over to excel using xlsxwriter. This is my code: > > from docx.api import Document > import xlsxwriter > > document = Document('/Users/xxx/Documents/xxx/Clauses Sample - Copy v1 - > for merge.docx') tables = document.tables > > wb = xlsxwriter.Workbook('C:/Users/xxx/Documents/xxx/test clause > retrieval.xlsx') Sheet1 = wb.add_worksheet("Compliance") > index_row = 0 > > print(len(tables)) > > for table in document.tables: > data = [] > keys = None > for i, row in enumerate(table.rows): > text = (cell.text for cell in row.cells) > > if i == 0: > keys = tuple(text) > continue > row_data = dict(zip(keys, text)) > data.append(row_data) > #print (data) > #big_data.append(data) > Sheet1.write(index_row,0, str(row_data)) > index_row = index_row + 1 > > print(row_data) > > wb.close() > > > This is my desired output: https://i.stack.imgur.com/9qnbw.png > > However, here is my actual output: https://i.stack.imgur.com/vpXej.png > > I am aware that my current output produces a list of string instead. > > Is there anyway that I can get my desired output using xlsxwriter?
I had to simulate docx.api. With that caveat the following seems to work: import xlsxwriter # begin simulation of # from docx.api import Document class Cell: def __init__(self, text): self.text = text class Row: def __init__(self, cells): self.cells = [Cell(c) for c in cells] class Table: def __init__(self, data): self.rows = [ Row(row) for row in data ] class Document: def __init__(self): self.tables = [ Table([ ["Hello", "Test"], ["est", "ing"], ["gg", "ff"] ]), Table([ ["Foo", "Bar", "Baz"], ["ham", "spam", "jam"] ]) ] document = Document() # end simulation wb = xlsxwriter.Workbook("tmp.xlsx") sheet = wb.add_worksheet("Compliance") offset = 0 for table in document.tables: for y, row in enumerate(table.rows): for x, cell in enumerate(row.cells): sheet.write(y + offset, x, cell.text) offset += len(table.rows) + 1 # one empty row between tables wb.close() -- https://mail.python.org/mailman/listinfo/python-list