libreoffice python 操作word及excel文档的方法
1、开始、关闭libreoffice服务;
开始之前同步字体文件时间,是因为创建soffice服务时,服务会检查所需加载的文件的时间,如果其认为时间不符,则其可能会重新加载,耗时较长,因此需事先统一时间。
使用时如果需要多次调用,最后每次调用均开启后关闭,否则libreoffice会创建一个缓存文档并越用越大,处理时间会增加。
classOfficeProcess(object):
def__init__(self):
self.p=0
subprocess.Popen('find/usr/share/fonts|xargstouch-m-t201801010000.00',shell=True)
defstart_office(self):
self.p=subprocess.Popen('soffice--pidfile=sof.pid--invisible--accept="socket,host=localhost,port=2002;urp;"',shell=True)
whileTrue:
try:
local_context=uno.getComponentContext()
resolver=local_context.getServiceManager().createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver',local_context)
resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext')
return
except:
print(ts(),"waitforconnectingsoffice...")
time.sleep(1)
continue
defstop_office(self):
withopen("sof.pid","rb")asf:
try:
os.kill(int(f.read()),signal.SIGTERM)
self.p.wait()
except:
pass
2、initservicemanager
local_context=uno.getComponentContext()
service_manager=local_context.getServiceManager()
resolver=service_manager.createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver',local_context)
self.ctx=resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext')
self.smgr=self.ctx.ServiceManager
self.desktop=self.smgr.createInstanceWithContext('com.sun.star.frame.Desktop',self.ctx)
3、从二进制数据中读取doc文档
defImportFromMemory(self,data):
istream=self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream',self.ctx)
istream.initialize((uno.ByteSequence(data),))
pv=PropertyValue()
pv.Name='InputStream'
pv.Value=istream
self.doc={'doc':[]}
try:
self.document=self.desktop.loadComponentFromURL('private:stream/swriter','_blank',0,(pv,))
self.text=self.document.getText()
except:
self.text=None
4、读取doc文档中的数据
defExportToJson(self):
try:
l=self.__ParseText(self.text,self.__Callback(self.doc['doc']))
self.doc['length']=l
except:
self.doc={'doc':[],'length':0}
returnjson.dumps(self.doc)
@staticmethod
def__Callback(alist):
defAppend(sth):
alist.append(sth)
returnAppend
def__ParseText(self,text,func):
l=0
text_it=text.createEnumeration()
whiletext_it.hasMoreElements():
element=text_it.nextElement()
ifelement.supportsService('com.sun.star.text.Paragraph'):
l+=self.__ParseParagraph(element,func)
elifelement.supportsService('com.sun.star.text.TextTable'):
l+=self.__ParseTable(element,func)
else:
pass
returnl
def__ParseParagraph(self,paragraph,func):
p={'paragraph':[]}
l=0
paragraph_it=paragraph.createEnumeration()
whileparagraph_it.hasMoreElements():
portion=paragraph_it.nextElement()
ifportion.TextPortionType=='Text':
l+=self.__ParsePortionText(portion,self.__Callback(p['paragraph']))
elifportion.TextPortionType=='SoftPageBreak':
pass
elifportion.TextPortionType=='TextField':
l+=self.__ParsePortionText(portion,self.__Callback(p['paragraph']))
else:
l+=self.__ParseTextContent(portion,self.__Callback(p['paragraph']))
ifhasattr(paragraph,'createContentEnumeration'):
l+=self.__ParseTextContent(paragraph,self.__Callback(p['paragraph']))
p['length']=l
func(p)
returnl
def__ParseTextContent(self,textcontent,func):
l=0
content_it=textcontent.createContentEnumeration('com.sun.star.text.TextContent')
whilecontent_it.hasMoreElements():
element=content_it.nextElement()
ifelement.supportsService('com.sun.star.text.TextGraphicObject'):
l+=self.__ParsePortionGraphic(element,func)
elifelement.supportsService('com.sun.star.text.TextEmbeddedObject'):
pass
elifelement.supportsService('com.sun.star.text.TextFrame'):
l+=self.__ParseFrame(element,func)
elifelement.supportsService('com.sun.star.drawing.GroupShape'):
l+=self.__ParseGroup(element,func)
else:
pass
returnl
def__ParseFrame(self,frame,func):
f={'frame':[]}
l=self.__ParseText(frame.getText(),self.__Callback(f['frame']))
f['length']=l
func(f)
returnl
def__ParseGroup(self,group,func):
l=0
foriinrange(group.getCount()):
it=group.getByIndex(i)
ifit.supportsService('com.sun.star.drawing.Text'):
l+=self.__ParseFrame(it,func)
else:
pass
returnl
def__ParsePortionText(self,portion_text,func):
func({'portion':portion_text.String,'length':len(portion_text.String)})
returnlen(portion_text.String)
def__ParsePortionGraphic(self,portion_graphic,func):
gp=self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider',self.ctx)
stream=self.smgr.createInstanceWithContext('com.sun.star.io.TempFile',self.ctx)
pv1=PropertyValue()
pv1.Name='OutputStream'
pv1.Value=stream
pv2=PropertyValue()
pv2.Name='MimeType'
pv2.Value='image/png'
gp.storeGraphic(portion_graphic.Graphic,(pv1,pv2))
stream.getOutputStream().flush()
stream.seek(0)
l=stream.getInputStream().available()
b=uno.ByteSequence(b'')
stream.seek(0)
l,b=stream.getInputStream().readBytes(b,l)
img={'image':base64.b64encode(b.value).decode('ascii')}
img['height']=portion_graphic.Height
img['width']=portion_graphic.Width
img['actualheight']=portion_graphic.ActualSize.Height
img['actualwidth']=portion_graphic.ActualSize.Width
img['croptop']=portion_graphic.GraphicCrop.Top
img['cropbottom']=portion_graphic.GraphicCrop.Bottom
img['cropleft']=portion_graphic.GraphicCrop.Left
img['cropright']=portion_graphic.GraphicCrop.Right
img['length']=0
func(img)
return0
def__ParseTable(self,table,func):
l=0
try:
matrix=self.__GetTableMatrix(table)
seps=self.__GetTableSeparators(table)
t={}
count=0
forriinmatrix.keys():
t[ri]={}
forciinmatrix[ri].keys():
t[ri][ci]=dict(matrix[ri][ci])
delt[ri][ci]['cell']
t[ri][ci]['content']=[]
l+=self.__ParseText(matrix[ri][ci]['cell'],self.__Callback(t[ri][ci]['content']))
count+=t[ri][ci]['rowspan']*t[ri][ci]['colspan']
ifcount!=len(t)*len(seps):
raiseValueError('countofcellserror')
func({'table':t,'row':len(t),'column':len(seps),'length':l,'tableid':self.table_id})
self.table_id+=1
except:
l=0
print('discardwrongtable')
returnl
@staticmethod
def__GetTableSeparators(table):
result=[table.TableColumnRelativeSum]
forriinrange(table.getRows().getCount()):
result+=[s.Positionforsintable.getRows().getByIndex(ri).TableColumnSeparators]
result=sorted(set(result))
foriinrange(len(result)-1):
result[i]+=1ifresult[i]+1==result[i+1]else0
returnsorted(set(result))
@staticmethod
def__NameToRC(name):
r=int(re.sub('[A-Za-z]','',name))-1
cstr=re.sub('[0-9]','',name)
c=0
foriinrange(len(cstr)):
ifcstr[i]>='A'andcstr[i]<='Z':
c=c*52+ord(cstr[i])-ord('A')
else:
c=c*52+26+ord(cstr[i])-ord('a')
returnr,c
@staticmethod
def__GetTableMatrix(table):
result={}
fornameintable.getCellNames():
ri,ci=WordToJson.__NameToRC(name)
cell=table.getCellByName(name)
ifrinotinresult:
result[ri]={}
result[ri][ci]={'cell':cell,'rowspan':cell.RowSpan,'name':name}
seps=WordToJson.__GetTableSeparators(table)
forriinresult.keys():
sep=[s.Positionforsintable.getRows().getByIndex(ri).TableColumnSeparators]+[table.TableColumnRelativeSum]
sep=sorted(set(sep))
forciinresult[ri].keys():
right=seps.index(sep[ci])ifsep[ci]insepselseseps.index(sep[ci]+1)
left=-1ifci==0elseseps.index(sep[ci-1])ifsep[ci-1]insepselseseps.index(sep[ci-1]+1)
result[ri][ci]['colspan']=right-left
returnresult
5、写doc文档
self.doco=self.desktop.loadComponentFromURL('private:factory/swriter','_blank',0,())
self.texto=self.doco.getText()
self.cursoro=self.texto.createTextCursor()
self.cursoro.ParaBottomMargin=500
def__WriteText(self,text,texto,cursoro):
foritintext:
if'paragraph'init:
self.__WriteParagraph(it,texto,cursoro)
elif'image'init:
self.__WritePortionGraphic(it,texto,cursoro)
elif'table'init:
self.__WriteTable(it,texto,cursoro)
def__WriteParagraph(self,paragraph,texto,cursoro):
ifparagraph['length']>0:
if'result'inparagraph:
foritinparagraph['result']:
texto.insertString(cursoro,it['trans_sen'],False)
else:
texto.insertString(cursoro,paragraph['paragraph'],False)
texto.insertControlCharacter(cursoro,ControlCharacter.PARAGRAPH_BREAK,False)
def__WritePortionGraphic(self,portion_graphic,texto,cursoro):
png_base64=portion_graphic['image']
png=base64.b64decode(png_base64)
gp=self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider',self.ctx)
istream=self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream',self.ctx)
istream.initialize((uno.ByteSequence(png),))
pv=PropertyValue()
pv.Name='InputStream'
pv.Value=istream
actualsize=uno.createUnoStruct('com.sun.star.awt.Size')
actualsize.Height=portion_graphic['actualheight']if'actualheight'inportion_graphicelseportion_graphic['height']
actualsize.Width=portion_graphic['actualwidth']if'actualwidth'inportion_graphicelseportion_graphic['width']
graphiccrop=uno.createUnoStruct('com.sun.star.text.GraphicCrop')
graphiccrop.Top=portion_graphic['croptop']if'croptop'inportion_graphicelse0
graphiccrop.Bottom=portion_graphic['cropbottom']if'cropbottom'inportion_graphicelse0
graphiccrop.Left=portion_graphic['cropleft']if'cropleft'inportion_graphicelse0
graphiccrop.Right=portion_graphic['cropright']if'cropright'inportion_graphicelse0
image=self.doco.createInstance('com.sun.star.text.TextGraphicObject')
image.Surround=NONE
image.Graphic=gp.queryGraphic((pv,))
image.Height=portion_graphic['height']
image.Width=portion_graphic['width']
image.setPropertyValue('ActualSize',actualsize)
image.setPropertyValue('GraphicCrop',graphiccrop)
texto.insertTextContent(cursoro,image,False)
texto.insertControlCharacter(cursoro,ControlCharacter.PARAGRAPH_BREAK,False)
def__WriteTable(self,table,texto,cursoro):
tableo=self.doco.createInstance('com.sun.star.text.TextTable')
tableo.initialize(table['row'],table['column'])
texto.insertTextContent(cursoro,tableo,False)
#texto.insertControlCharacter(cursoro,ControlCharacter.PARAGRAPH_BREAK,False)
tcursoro=tableo.createCursorByCellName("A1")
hitbug=False
iftable['row']>1:
tcursoro.goDown(1,True)
hitbug=tcursoro.getRangeName()=='A1'
forriinsorted([int(r)forrintable['table'].keys()]):
rs=table['table'][str(ri)]
forciinsorted([int(c)forcinrs.keys()]):
cell=rs[str(ci)]
ifhitbug==Falseand(cell['rowspan']>1orcell['colspan']>1):
tcursoro.gotoCellByName(cell['name'],False)
ifcell['rowspan']>1:
tcursoro.goDown(cell['rowspan']-1,True)
ifcell['colspan']>1:
tcursoro.goRight(cell['colspan']-1,True)
tcursoro.mergeRange()
ctexto=tableo.getCellByName(cell['name'])
ifctexto==None:
continue
ccursoro=ctexto.createTextCursor()
ccursoro.CharWeight=FontWeight.NORMAL
ccursoro.CharWeightAsian=FontWeight.NORMAL
ccursoro.ParaAdjust=LEFT
self.__WriteText(cell['content'],ctexto,ccursoro)
6、生成二进制的doc文档数据
streamo=self.smgr.createInstanceWithContext('com.sun.star.io.Pipe',self.ctx)
self.doco.storeToURL('private:stream',(PropertyValue('FilterName',0,'MSWord2007XML',0),PropertyValue('OutputStream',0,streamo,0)))
streamo.flush()
_,datao=streamo.readBytes(None,streamo.available())
7、从doc文档数据生成pdf的二进制数据
streamo=self.smgr.createInstanceWithContext('com.sun.star.io.Pipe',self.ctx)
self.doco.storeToURL('private:stream',(PropertyValue('FilterName',0,'writer_pdf_Export',0),PropertyValue('OutputStream',0,streamo,0)))
streamo.flush()
_,datap=streamo.readBytes(None,streamo.available())
8、读取excel二进制数据
defImportFromMemory(self,data):
istream=self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream',self.ctx)
istream.initialize((uno.ByteSequence(data),))
pv=PropertyValue()
pv.Name='InputStream'
pv.Value=istream
self.doc={'doc':[]}
try:
print("beforeloadComponentFromURL")
self.document=self.desktop.loadComponentFromURL('private:stream/scalc','_blank',0,(pv,))
self.sheets=self.document.getSheets()
print("ImportFromMemorydone")
except:
print("ImportFromMemoryfailed")
self.sheets=None
9、读取excel的文本数据
defExportToJson(self):
try:
l=self.__ParseText(self.sheets,self.__Callback(self.doc['doc']))
self.doc['length']=l
except:
self.doc={'doc':[],'length':0}
returnjson.dumps(self.doc)
def__ParseText(self,sheets,func):
l=0
sheets_it=sheets.createEnumeration()
whilesheets_it.hasMoreElements():
element=sheets_it.nextElement()
ifelement.supportsService('com.sun.star.sheet.Spreadsheet'):
l+=self.__ParseSpreadsheet(element,func)
returnl
def__ParseSpreadsheet(self,spreadsheet,func):
l=0
p={'spreadsheet':[]}
visible_cells_it=spreadsheet.queryVisibleCells().getCells().createEnumeration()
whilevisible_cells_it.hasMoreElements():
cell=visible_cells_it.nextElement()
type=cell.getType()
iftype==self.EMPTY:
print("cell.type==empty")
eliftype==self.VALUE:
print("cell.type==VALUE","value=",cell.getValue(),cell.getCellAddress())
eliftype==self.TEXT:
print("cell.type==TEXT","content=",cell.getString().encode("UTF-8"),cell.getCellAddress())
l+=self.__ParseCellText(spreadsheet,cell,self.__Callback(p['spreadsheet']))
print("__ParseCellText=",p)
eliftype==self.FORMULA:
print("cell.type==FORMULA","formula=",cell.getValue())
p['length']=l
func(p)
returnl
def__ParseCellText(self,sheet,cell,func):
try:
x=cell.getCellAddress().Column
y=cell.getCellAddress().Row
sheetname=sheet.getName()
except:
x=-1
y=-1
sheetname=None
func({'celltext':cell.getString(),'x':x,'y':y,'sheetname':sheetname,'length':len(cell.getString())})
returnlen(cell.getString())
self.EMPTY=uno.Enum("com.sun.star.table.CellContentType","EMPTY")
self.TEXT=uno.Enum("com.sun.star.table.CellContentType","TEXT")
self.FORMULA=uno.Enum("com.sun.star.table.CellContentType","FORMULA")
self.VALUE=uno.Enum("com.sun.star.table.CellContentType","VALUE")
10、替换excel的文本信息
defImportFromJson(self,data): doc=json.loads(data) try: self.__WriteText(doc['doc']) except: pass
def__WriteText(self,text):
print("__WriteTextbegin:",text)
sheet=None
foritintext:
if'paragraph'initand'sheetname'init:
ifsheet==Noneorsheet.getName()!=it['sheetname']:
try:
sheet=self.sheets.getByName(it['sheetname'])
print("getsheet:",it['sheetname'],"=",sheet.getName())
except:
sheet=None
continue
self.__WriteParagraph(it,sheet)
def__WriteParagraph(self,paragraph,sheet):
print("__WriteParagraph")
ifparagraph['length']>0:
try:
x=paragraph['x']
y=paragraph['y']
print("getcell:",x,y)
cell=sheet.getCellByPosition(x,y)
print("getcelldone")
except:
return
if'result'inparagraph:
foritinparagraph['result']:
print("cell=",cell.getString())
cell.setString(it['trans_sen'])
print("cell,",cell.getString(),",done")
11、生成excel文档二进制数据
streamo=self.smgr.createInstanceWithContext('com.sun.star.io.Pipe',self.ctx)
self.document.storeToURL('private:stream',(PropertyValue('FilterName',0,'CalcMSExcel2007XML',0),PropertyValue('OutputStream',0,streamo,0)))
streamo.flush()
_,datao=streamo.readBytes(None,streamo.available())
12、生成excel的pdf文档
streamo=self.smgr.createInstanceWithContext('com.sun.star.io.Pipe',self.ctx)
self.document.storeToURL('private:stream',(PropertyValue('FilterName',0,'calc_pdf_Export',0),PropertyValue('OutputStream',0,streamo,0)))
streamo.flush()
_,datap=streamo.readBytes(None,streamo.available())
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。