Win32 and Excel ranges
Mike Brenner
mikeb at mitre.org
Fri Oct 18 15:03:51 EDT 2002
More information about the Python-list mailing list
Fri Oct 18 15:03:51 EDT 2002
- Previous message (by thread): Win32 and Excel ranges
- Next message (by thread): Ignoring comments - Parsing input file
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Larry Whitley <ldwhitley at removeme.charter.net> wrote: > I've been going through Mark Hammond's book doing > his Python/Excel examples. It's working nicely but > I'm dissapointed with the performance of storing a > lot of data in the spread sheet cell by cell. It would be nice to access the spreadsheet a cell at a time, because that is our primary paradigms of spreadsheets, but the current COM implementation in Python makes that prohibitively expensive. One would have thought that it would have the same cost for Visual Basic and for Python, but Python has about the same cost for extracting one number from an Excel spreadsheet as it does for extracting 50,000 numbers, so you might as well take the whole spreadsheet at one time and do all the processing in Python. It is worse for extracting colors than than for extracting numbers because ranges can't be used. To extract the colors, I had python generate the following Visual Basic macro to place the colors (as numbers) into the cells, 100 columns to the right of the spreadsheet itself. After running this macro, a single Python call can extract the colors from those second hundred columns. (I used "rip" in the code at the bottom of this message. Sub Macro1() Dim offset As Integer Dim row As Integer Dim col As Integer Sheets("Artificial Placenta Electrophersis").Select Range("CU1:HA2100").Select Selection.ClearContents offset = 100 last_row = 1171 last_col = 75 For row = 1 To last_row For col = 1 To last_col Cells(row, col + offset).Value = Cells(row, col).Interior.Color Next col Next row End Sub Paul Casteels wrote: > This is something that works for me (using NumPy for the arrays) : > > xlApp = Dispatch("Excel.Application") > xlApp.Workbooks.Add() > xSheet = xlApp.ActiveWorkbook.ActiveSheet > xSheet.Range(xSheet.Cells(row,col), \ > xSheet.Cells(row+aLen-1,col+1)).Value = array > > My array is 2*8196 and the speed of this 1 line is also very low. > Does someone have other suggestions for speeding this up ? Here is the code that shows it also works with ordinary Python arrays. The RANGE is much faster than doing it one cell at a time. # junk1.py # create a dummy excel file called C:\\junk1.xls before running this from win32com.client import Dispatch xLApp = Dispatch("Excel.Application") xLBook = xLApp.Workbooks.Open("c:\\junk1.xls") xLBook.Worksheets("Sheet1").Select() xSheet=xLBook.ActiveSheet row=1; col=1; aLen=5 array=(("row 1 col A","row 1 col B"), ("row 2 col A","row 2 col B"), ("row 3 col A","row 3 col B"), ("row 4 col A","row 4 col B"), ("row 5 col A","row 5 col B")) xSheet.Range(xSheet.Cells(row,col), \ xSheet.Cells(row+aLen-1,col+1)).Value = array xLBook.Close(SaveChanges=1) # end of junk.py ================================================== # COMtools.py # import string import traceback import tools import types def com_project_saveas_example(directory): """ com_project_saveas_html_example saves the PROJECT files in a directory as html """ log("attempting to bring up PROJECT") global pApp from win32com.client import Dispatch pApp=Dispatch("MSProject.Application") log("brought up Project") pApp.Visible=1 os.chdir(directory) # pApp.ChangeFileOpenDirectory(directory) # for the button for pFile in os.listdir(os.getcwd()): if string.find(pFile,"mpp")>1: csvFile=pFile+".csv" pApp.Add(pFile) # pApp.Documents.Add(pFile) also fails pApp.ActiveDocument.SaveAs(csvFile,17) pApp.ActiveDocument.Close() print "Saved "+pFile+" as "+csvFile pApp.Quit() def com_word2html(directory, files): """ This works for Word 2000, but Word 2000 gives all sorts of complex htmls and xmls instead of a simple, naive html like Word 97 does. If you have both installed, then you have to use VB to get W97. """ # e.g. 3.5 inches=216 points from win32com.client import Dispatch import os os.chdir(directory) W = Dispatch("Word.Application") W.visible=1 W.ChangeFileOpenDirectory(directory) for file in files.keys(): infile,outfile,shortName=files[file] print "saving",file,"in=",infile,"out=",outfile Doc = W.Documents.Open(infile, ReadOnly=1) Doc.SaveAs(FileName=outfile, FileFormat=8) # web format print " SAVED AS WEB PAGE" Doc.Close() W.Quit() def com_word_saveas_html_example(directory): """ com_word_saveas_html_example saves the WORD files in a directory as html """ log("attempting to bring up Word") global wApp from win32com.client import Dispatch wApp=Dispatch("Word.Application") log("brought up Word") wApp.Visible=1 os.chdir(directory) wApp.ChangeFileOpenDirectory(directory) # for the button for WordFile in os.listdir(os.getcwd()): if string.find(WordFile,"doc")>1: htmlfile=WordFile+".html" wApp.Documents.Add(WordFile) wApp.ActiveDocument.SaveAs(htmlfile,17) wApp.ActiveDocument.Close() print "Saved "+WordFile+" as html "+htmlfile wApp.Quit() def com_zz_done(): from win32com.test.util import CheckClean CheckClean() import pythoncom pythoncom.CoUninitialize() from win32com.client import Dispatch import os class pyExcel: def __init__(self): self.filename=None self.currentSheet="No Sheet" try: self.xL = Dispatch("Excel.Application") except: raise "could not dispatch Excel.Application" def openFile(self,thisFile=None,closeFirst=1,visible=1, tracingCloses=0,tracingOpens=0): if self.filename!=None: if self.filename==thisFile: return elif closeFirst: if tracingCloses: print "closing Excel file",self.filename self.closeFile() if thisFile: self.filename=thisFile try: print "opening ",self.filename self.xLBook=self.xL.Workbooks.Open(thisFile) except: raise "could not open Excel workbook ["+thisFile+"]" else: self.filename="" try: self.xLBook=self.xL.Workbooks.Add() self.filename="unsaved, unnamed Excel Workbook" except: raise "could not open a blank Excel sheet" self.xL.Visible = visible def selectSheet(self,thisFile,thisSheet): self.openFile(thisFile) self.mbo(); self.currentSheet="" try: self.xLBook.Worksheets(thisSheet).Select() if 0: self.xL.Sheets(name).Select() except: traceback.print_stack() raise "problem selecting Excel sheet "+thisSheet+" in "+thisFile self.currentSheet=thisSheet def closeFile(self): self.mbo() if self.filename: try: self.xLBook.Close(SaveChanges=0) except: print "problem closing Excel file "+self.filename self.filename=None del self.xLBook def close(self): if self.filename!=None: self.closeFile() self.xL.Quit() del self.xL # # Utility Methods # def mbo(self): if self.filename==None: raise "must be open: Open a spreadsheet first" # # Spreadsheet Operations # def cell(self,row,col): # WARNING: This is VERY slow. Use rip to get # a whole block at a time. This procedure takes # the same amount of time on ONE cell as # RIP takes on a whole spreadsheet with 10,000 # cells, because the COM overhead is constant # and VERY high. self.mbo() try: u=self.xL.ActiveSheet.Cells(row,col).Value except: traceback.print_stack() print print "COULD NOT GET CELL(row=",row,", col=",col,\ "from sheet",self.currentSheet raise return self.strip(u) def setCell(self,row,col,value): self.mbo() self.xL.ActiveSheet.Cells(row,col).Value=value if 0: self.xL.ActiveWorkbook.ActiveSheet.Cells(row,col).Value = value def cell_color(self,row,col): # WARNING: EXTREMELY slow. Instead, use a VB macro # to put the COLORS into a different block of cells # as the VALUE of those new cells. Then read in the # values of those new cells in a single block using rip. self.mbo() return self.xL.ActiveSheet.Cells(row,col).Interior.Color def cell_border(self,row,col): # WARNING: EXTREMELY slow. Instead, use a VB macro # to put the BORDERS into a different block of cells # as the VALUE of those new cells. Then read in the # values of those new cells in a single block using rip. self.mbo() return self.xL.ActiveSheet.Cells(row,col).Borders(9).Color def rip(self, fileName, sheetName, rectangle): row1, col1, row2, col2=rectangle self.selectSheet(fileName, sheetName) self.mbo() S=self.xL.ActiveWorkbook.ActiveSheet x=S.Range(S.Cells(row1,col1),S.Cells(row2,col2)).Value2 # Value2 turns data objects into floating point numbers try: L=len(x) except TypeError: print print "COMtools.rip: Got a TypeError taking len(x)," print " the array coming back from Range.Value" print "type(x)=",type(x) traceback.print_stack() print "x=",x raise y=[] for row in range(len(x)): R=[element for element in x[row]] y.append(map(tools.phrase_unicode2string, R)) if row1==row2: return y[0] # one-dimensional array (a row) if col1==col2: return [y[i][0] for i in range(len(y))] # one_dimensional array (a column) return y # two_dimensional array def save(self): self.mbo() self.xLBook.Save() def saveAs(self,filename): self.mbo() self.filename=filename self.xLBook.SaveAs(filename)
- Previous message (by thread): Win32 and Excel ranges
- Next message (by thread): Ignoring comments - Parsing input file
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Python-list mailing list