libreoffice python 操作word及excel文档的方法
1、开始、关闭libreoffice服务;
开始之前同步字体文件时间,是因为创建soffice服务时,服务会检查所需加载的文件的时间,如果其认为时间不符,则其可能会重新加载,耗时较长,因此需事先统一时间。
使用时如果需要多次调用,最后每次调用均开启后关闭,否则libreoffice会创建一个缓存文档并越用越大,处理时间会增加。
classOfficeProcess(object): def__init__(self): self.p=0 subprocess.Popen('find/usr/share/fonts|xargstouch-m-t201801010000.00',shell=True) defstart_office(self): self.p=subprocess.Popen('soffice--pidfile=sof.pid--invisible--accept="socket,host=localhost,port=2002;urp;"',shell=True) whileTrue: try: local_context=uno.getComponentContext() resolver=local_context.getServiceManager().createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver',local_context) resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext') return except: print(ts(),"waitforconnectingsoffice...") time.sleep(1) continue defstop_office(self): withopen("sof.pid","rb")asf: try: os.kill(int(f.read()),signal.SIGTERM) self.p.wait() except: pass
2、initservicemanager
local_context=uno.getComponentContext() service_manager=local_context.getServiceManager() resolver=service_manager.createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver',local_context) self.ctx=resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext') self.smgr=self.ctx.ServiceManager self.desktop=self.smgr.createInstanceWithContext('com.sun.star.frame.Desktop',self.ctx)
3、从二进制数据中读取doc文档
defImportFromMemory(self,data): istream=self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream',self.ctx) istream.initialize((uno.ByteSequence(data),)) pv=PropertyValue() pv.Name='InputStream' pv.Value=istream self.doc={'doc':[]} try: self.document=self.desktop.loadComponentFromURL('private:stream/swriter','_blank',0,(pv,)) self.text=self.document.getText() except: self.text=None
4、读取doc文档中的数据
defExportToJson(self): try: l=self.__ParseText(self.text,self.__Callback(self.doc['doc'])) self.doc['length']=l except: self.doc={'doc':[],'length':0} returnjson.dumps(self.doc) @staticmethod def__Callback(alist): defAppend(sth): alist.append(sth) returnAppend
def__ParseText(self,text,func): l=0 text_it=text.createEnumeration() whiletext_it.hasMoreElements(): element=text_it.nextElement() ifelement.supportsService('com.sun.star.text.Paragraph'): l+=self.__ParseParagraph(element,func) elifelement.supportsService('com.sun.star.text.TextTable'): l+=self.__ParseTable(element,func) else: pass returnl
def__ParseParagraph(self,paragraph,func): p={'paragraph':[]} l=0 paragraph_it=paragraph.createEnumeration() whileparagraph_it.hasMoreElements(): portion=paragraph_it.nextElement() ifportion.TextPortionType=='Text': l+=self.__ParsePortionText(portion,self.__Callback(p['paragraph'])) elifportion.TextPortionType=='SoftPageBreak': pass elifportion.TextPortionType=='TextField': l+=self.__ParsePortionText(portion,self.__Callback(p['paragraph'])) else: l+=self.__ParseTextContent(portion,self.__Callback(p['paragraph'])) ifhasattr(paragraph,'createContentEnumeration'): l+=self.__ParseTextContent(paragraph,self.__Callback(p['paragraph'])) p['length']=l func(p) returnl def__ParseTextContent(self,textcontent,func): l=0 content_it=textcontent.createContentEnumeration('com.sun.star.text.TextContent') whilecontent_it.hasMoreElements(): element=content_it.nextElement() ifelement.supportsService('com.sun.star.text.TextGraphicObject'): l+=self.__ParsePortionGraphic(element,func) elifelement.supportsService('com.sun.star.text.TextEmbeddedObject'): pass elifelement.supportsService('com.sun.star.text.TextFrame'): l+=self.__ParseFrame(element,func) elifelement.supportsService('com.sun.star.drawing.GroupShape'): l+=self.__ParseGroup(element,func) else: pass returnl def__ParseFrame(self,frame,func): f={'frame':[]} l=self.__ParseText(frame.getText(),self.__Callback(f['frame'])) f['length']=l func(f) returnl def__ParseGroup(self,group,func): l=0 foriinrange(group.getCount()): it=group.getByIndex(i) ifit.supportsService('com.sun.star.drawing.Text'): l+=self.__ParseFrame(it,func) else: pass returnl def__ParsePortionText(self,portion_text,func): func({'portion':portion_text.String,'length':len(portion_text.String)}) returnlen(portion_text.String) def__ParsePortionGraphic(self,portion_graphic,func): gp=self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider',self.ctx) stream=self.smgr.createInstanceWithContext('com.sun.star.io.TempFile',self.ctx) pv1=PropertyValue() pv1.Name='OutputStream' pv1.Value=stream pv2=PropertyValue() pv2.Name='MimeType' pv2.Value='image/png' gp.storeGraphic(portion_graphic.Graphic,(pv1,pv2)) stream.getOutputStream().flush() stream.seek(0) l=stream.getInputStream().available() b=uno.ByteSequence(b'') stream.seek(0) l,b=stream.getInputStream().readBytes(b,l) img={'image':base64.b64encode(b.value).decode('ascii')} img['height']=portion_graphic.Height img['width']=portion_graphic.Width img['actualheight']=portion_graphic.ActualSize.Height img['actualwidth']=portion_graphic.ActualSize.Width img['croptop']=portion_graphic.GraphicCrop.Top img['cropbottom']=portion_graphic.GraphicCrop.Bottom img['cropleft']=portion_graphic.GraphicCrop.Left img['cropright']=portion_graphic.GraphicCrop.Right img['length']=0 func(img) return0 def__ParseTable(self,table,func): l=0 try: matrix=self.__GetTableMatrix(table) seps=self.__GetTableSeparators(table) t={} count=0 forriinmatrix.keys(): t[ri]={} forciinmatrix[ri].keys(): t[ri][ci]=dict(matrix[ri][ci]) delt[ri][ci]['cell'] t[ri][ci]['content']=[] l+=self.__ParseText(matrix[ri][ci]['cell'],self.__Callback(t[ri][ci]['content'])) count+=t[ri][ci]['rowspan']*t[ri][ci]['colspan'] ifcount!=len(t)*len(seps): raiseValueError('countofcellserror') func({'table':t,'row':len(t),'column':len(seps),'length':l,'tableid':self.table_id}) self.table_id+=1 except: l=0 print('discardwrongtable') returnl @staticmethod def__GetTableSeparators(table): result=[table.TableColumnRelativeSum] forriinrange(table.getRows().getCount()): result+=[s.Positionforsintable.getRows().getByIndex(ri).TableColumnSeparators] result=sorted(set(result)) foriinrange(len(result)-1): result[i]+=1ifresult[i]+1==result[i+1]else0 returnsorted(set(result)) @staticmethod def__NameToRC(name): r=int(re.sub('[A-Za-z]','',name))-1 cstr=re.sub('[0-9]','',name) c=0 foriinrange(len(cstr)): ifcstr[i]>='A'andcstr[i]<='Z': c=c*52+ord(cstr[i])-ord('A') else: c=c*52+26+ord(cstr[i])-ord('a') returnr,c @staticmethod def__GetTableMatrix(table): result={} fornameintable.getCellNames(): ri,ci=WordToJson.__NameToRC(name) cell=table.getCellByName(name) ifrinotinresult: result[ri]={} result[ri][ci]={'cell':cell,'rowspan':cell.RowSpan,'name':name} seps=WordToJson.__GetTableSeparators(table) forriinresult.keys(): sep=[s.Positionforsintable.getRows().getByIndex(ri).TableColumnSeparators]+[table.TableColumnRelativeSum] sep=sorted(set(sep)) forciinresult[ri].keys(): right=seps.index(sep[ci])ifsep[ci]insepselseseps.index(sep[ci]+1) left=-1ifci==0elseseps.index(sep[ci-1])ifsep[ci-1]insepselseseps.index(sep[ci-1]+1) result[ri][ci]['colspan']=right-left returnresult
5、写doc文档
self.doco=self.desktop.loadComponentFromURL('private:factory/swriter','_blank',0,()) self.texto=self.doco.getText() self.cursoro=self.texto.createTextCursor() self.cursoro.ParaBottomMargin=500
def__WriteText(self,text,texto,cursoro): foritintext: if'paragraph'init: self.__WriteParagraph(it,texto,cursoro) elif'image'init: self.__WritePortionGraphic(it,texto,cursoro) elif'table'init: self.__WriteTable(it,texto,cursoro) def__WriteParagraph(self,paragraph,texto,cursoro): ifparagraph['length']>0: if'result'inparagraph: foritinparagraph['result']: texto.insertString(cursoro,it['trans_sen'],False) else: texto.insertString(cursoro,paragraph['paragraph'],False) texto.insertControlCharacter(cursoro,ControlCharacter.PARAGRAPH_BREAK,False) def__WritePortionGraphic(self,portion_graphic,texto,cursoro): png_base64=portion_graphic['image'] png=base64.b64decode(png_base64) gp=self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider',self.ctx) istream=self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream',self.ctx) istream.initialize((uno.ByteSequence(png),)) pv=PropertyValue() pv.Name='InputStream' pv.Value=istream actualsize=uno.createUnoStruct('com.sun.star.awt.Size') actualsize.Height=portion_graphic['actualheight']if'actualheight'inportion_graphicelseportion_graphic['height'] actualsize.Width=portion_graphic['actualwidth']if'actualwidth'inportion_graphicelseportion_graphic['width'] graphiccrop=uno.createUnoStruct('com.sun.star.text.GraphicCrop') graphiccrop.Top=portion_graphic['croptop']if'croptop'inportion_graphicelse0 graphiccrop.Bottom=portion_graphic['cropbottom']if'cropbottom'inportion_graphicelse0 graphiccrop.Left=portion_graphic['cropleft']if'cropleft'inportion_graphicelse0 graphiccrop.Right=portion_graphic['cropright']if'cropright'inportion_graphicelse0 image=self.doco.createInstance('com.sun.star.text.TextGraphicObject') image.Surround=NONE image.Graphic=gp.queryGraphic((pv,)) image.Height=portion_graphic['height'] image.Width=portion_graphic['width'] image.setPropertyValue('ActualSize',actualsize) image.setPropertyValue('GraphicCrop',graphiccrop) texto.insertTextContent(cursoro,image,False) texto.insertControlCharacter(cursoro,ControlCharacter.PARAGRAPH_BREAK,False) def__WriteTable(self,table,texto,cursoro): tableo=self.doco.createInstance('com.sun.star.text.TextTable') tableo.initialize(table['row'],table['column']) texto.insertTextContent(cursoro,tableo,False) #texto.insertControlCharacter(cursoro,ControlCharacter.PARAGRAPH_BREAK,False) tcursoro=tableo.createCursorByCellName("A1") hitbug=False iftable['row']>1: tcursoro.goDown(1,True) hitbug=tcursoro.getRangeName()=='A1' forriinsorted([int(r)forrintable['table'].keys()]): rs=table['table'][str(ri)] forciinsorted([int(c)forcinrs.keys()]): cell=rs[str(ci)] ifhitbug==Falseand(cell['rowspan']>1orcell['colspan']>1): tcursoro.gotoCellByName(cell['name'],False) ifcell['rowspan']>1: tcursoro.goDown(cell['rowspan']-1,True) ifcell['colspan']>1: tcursoro.goRight(cell['colspan']-1,True) tcursoro.mergeRange() ctexto=tableo.getCellByName(cell['name']) ifctexto==None: continue ccursoro=ctexto.createTextCursor() ccursoro.CharWeight=FontWeight.NORMAL ccursoro.CharWeightAsian=FontWeight.NORMAL ccursoro.ParaAdjust=LEFT self.__WriteText(cell['content'],ctexto,ccursoro)
6、生成二进制的doc文档数据
streamo=self.smgr.createInstanceWithContext('com.sun.star.io.Pipe',self.ctx) self.doco.storeToURL('private:stream',(PropertyValue('FilterName',0,'MSWord2007XML',0),PropertyValue('OutputStream',0,streamo,0))) streamo.flush() _,datao=streamo.readBytes(None,streamo.available())
7、从doc文档数据生成pdf的二进制数据
streamo=self.smgr.createInstanceWithContext('com.sun.star.io.Pipe',self.ctx) self.doco.storeToURL('private:stream',(PropertyValue('FilterName',0,'writer_pdf_Export',0),PropertyValue('OutputStream',0,streamo,0))) streamo.flush() _,datap=streamo.readBytes(None,streamo.available())
8、读取excel二进制数据
defImportFromMemory(self,data): istream=self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream',self.ctx) istream.initialize((uno.ByteSequence(data),)) pv=PropertyValue() pv.Name='InputStream' pv.Value=istream self.doc={'doc':[]} try: print("beforeloadComponentFromURL") self.document=self.desktop.loadComponentFromURL('private:stream/scalc','_blank',0,(pv,)) self.sheets=self.document.getSheets() print("ImportFromMemorydone") except: print("ImportFromMemoryfailed") self.sheets=None
9、读取excel的文本数据
defExportToJson(self): try: l=self.__ParseText(self.sheets,self.__Callback(self.doc['doc'])) self.doc['length']=l except: self.doc={'doc':[],'length':0} returnjson.dumps(self.doc)
def__ParseText(self,sheets,func): l=0 sheets_it=sheets.createEnumeration() whilesheets_it.hasMoreElements(): element=sheets_it.nextElement() ifelement.supportsService('com.sun.star.sheet.Spreadsheet'): l+=self.__ParseSpreadsheet(element,func) returnl def__ParseSpreadsheet(self,spreadsheet,func): l=0 p={'spreadsheet':[]} visible_cells_it=spreadsheet.queryVisibleCells().getCells().createEnumeration() whilevisible_cells_it.hasMoreElements(): cell=visible_cells_it.nextElement() type=cell.getType() iftype==self.EMPTY: print("cell.type==empty") eliftype==self.VALUE: print("cell.type==VALUE","value=",cell.getValue(),cell.getCellAddress()) eliftype==self.TEXT: print("cell.type==TEXT","content=",cell.getString().encode("UTF-8"),cell.getCellAddress()) l+=self.__ParseCellText(spreadsheet,cell,self.__Callback(p['spreadsheet'])) print("__ParseCellText=",p) eliftype==self.FORMULA: print("cell.type==FORMULA","formula=",cell.getValue()) p['length']=l func(p) returnl def__ParseCellText(self,sheet,cell,func): try: x=cell.getCellAddress().Column y=cell.getCellAddress().Row sheetname=sheet.getName() except: x=-1 y=-1 sheetname=None func({'celltext':cell.getString(),'x':x,'y':y,'sheetname':sheetname,'length':len(cell.getString())}) returnlen(cell.getString())
self.EMPTY=uno.Enum("com.sun.star.table.CellContentType","EMPTY") self.TEXT=uno.Enum("com.sun.star.table.CellContentType","TEXT") self.FORMULA=uno.Enum("com.sun.star.table.CellContentType","FORMULA") self.VALUE=uno.Enum("com.sun.star.table.CellContentType","VALUE")
10、替换excel的文本信息
defImportFromJson(self,data): doc=json.loads(data) try: self.__WriteText(doc['doc']) except: pass
def__WriteText(self,text): print("__WriteTextbegin:",text) sheet=None foritintext: if'paragraph'initand'sheetname'init: ifsheet==Noneorsheet.getName()!=it['sheetname']: try: sheet=self.sheets.getByName(it['sheetname']) print("getsheet:",it['sheetname'],"=",sheet.getName()) except: sheet=None continue self.__WriteParagraph(it,sheet) def__WriteParagraph(self,paragraph,sheet): print("__WriteParagraph") ifparagraph['length']>0: try: x=paragraph['x'] y=paragraph['y'] print("getcell:",x,y) cell=sheet.getCellByPosition(x,y) print("getcelldone") except: return if'result'inparagraph: foritinparagraph['result']: print("cell=",cell.getString()) cell.setString(it['trans_sen']) print("cell,",cell.getString(),",done")
11、生成excel文档二进制数据
streamo=self.smgr.createInstanceWithContext('com.sun.star.io.Pipe',self.ctx) self.document.storeToURL('private:stream',(PropertyValue('FilterName',0,'CalcMSExcel2007XML',0),PropertyValue('OutputStream',0,streamo,0))) streamo.flush() _,datao=streamo.readBytes(None,streamo.available())
12、生成excel的pdf文档
streamo=self.smgr.createInstanceWithContext('com.sun.star.io.Pipe',self.ctx) self.document.storeToURL('private:stream',(PropertyValue('FilterName',0,'calc_pdf_Export',0),PropertyValue('OutputStream',0,streamo,0))) streamo.flush() _,datap=streamo.readBytes(None,streamo.available())
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。