Java通过URL获取公众号文章生成HTML的方法
说明:通过公众号URL获取的内容,文字可以正常显示,但是图片存在跨域访问的问题,微信不允许跨域访问公众号图片,所以需要将公众号图片从存入本地后,再上传至OSS,然后把HTML中的图片全部替换为自己的OSS地址就可以了
这里就需要在后台对HTML进行DOM的解析,需要用的Jsoup
com.aliyun.oss aliyun-sdk-oss 2.2.3 org.jsoup jsoup 1.9.2
controller
packagecom.iueang.controller; importjava.io.File; importjava.util.HashMap; importjava.util.Map; importorg.jsoup.Jsoup; importorg.jsoup.nodes.Document; importorg.jsoup.nodes.Element; importorg.jsoup.select.Elements; importorg.springframework.stereotype.Controller; importorg.springframework.web.bind.annotation.RequestMapping; importorg.springframework.web.bind.annotation.ResponseBody; importcom.iueang.util.DownLoadImg; importcom.iueang.util.GetBody; importcom.iueang.util.OssUtil2; importcom.iueang.util.UrlUtil; @Controller publicclassTestUrl{ @RequestMapping("tohtml") publicStringtohtml(){ return"html/index.html"; } @RequestMapping("getHtml") @ResponseBody publicMapgetHtml(Stringurl){ //获取url文章生成文本 Stringhtml=UrlUtil.getAccess(url); Stringreg="(.*?)"; Stringhead=GetBody.getSubUtilSimple(html,reg); StringHTTPHOST="http://yueang2.oss-cn-qingdao.aliyuncs.com/testimg/"; StringnewsBody=head; Documentdoc=Jsoup.parse(newsBody); Elementspngs=doc.select("img[data-src]"); System.out.println(pngs); for(Elementelement:pngs){ //获取图片地址 StringimgUrl=element.attr("data-src"); //下载图片到本地 Stringfilename=DownLoadImg.downloadPicture(imgUrl); Filefile=newFile("D:\\m2\\"+filename); //上传至oss Booleanflag=OssUtil2.uploadFileToOss(file,"testimg/"+filename); if(flag){ file.delete(); } Stringnewsrc=HTTPHOST+filename; element.attr("src",newsrc); } newsBody=doc.toString(); System.out.println(newsBody); Map map=newHashMap (); map.put("resultHtml",newsBody); returnmap; } }
util工具类
GetBody类
packagecom.iueang.util; importjava.util.regex.Matcher; importjava.util.regex.Pattern; publicclassGetBody{ publicstaticStringgetSubUtilSimple(Stringhtml,Stringreg){ Patternpattern=Pattern.compile(reg);//匹配的模式 Matcherm=pattern.matcher(html); while(m.find()){ returnm.group(1); } return""; } }
OssUtil类
packagecom.iueang.util; importjava.io.File; importjava.util.HashMap; importjava.util.Map; importcom.aliyun.oss.OSSClient; importcom.aliyun.oss.model.ObjectMetadata; publicclassOssUtil2{ //以下几个参数值必填,参考文章最后文档 staticStringendpoint="http://oss-cn-qingdao.aliyuncs.com"; staticStringaccessKeyId="oss获取"; staticStringaccessKeySecert="oss获取"; staticStringbucketName="yueang2"; /** *上传单个文件到OSS *@paramfile要上传的文件File对象 *@paramobjName上传后的文件名,包含文件夹,比如game/game/test.txt *@return */ publicstaticbooleanuploadFileToOss(Filefile,StringobjName){ try{ OSSClientossClient=null; try{ ossClient=newOSSClient(endpoint,accessKeyId,accessKeySecert); }catch(Exceptione){ e.printStackTrace(); } ObjectMetadatameta=newObjectMetadata(); ossClient.putObject(bucketName,objName,file,meta); ossClient.shutdown(); }catch(Exceptione){ e.printStackTrace(); returnfalse; } returntrue; } }
DownLoadImg类
packagecom.iueang.util; importjava.io.ByteArrayOutputStream; importjava.io.DataInputStream; importjava.io.File; importjava.io.FileOutputStream; importjava.io.IOException; importjava.net.MalformedURLException; importjava.net.URL; importjava.util.UUID; importsun.misc.BASE64Encoder; publicclassDownLoadImg{ publicstaticStringdownloadPicture(StringurlList){ Stringfilename="iueang"+UUID.randomUUID().toString()+".png"; Stringpath="D:/m2/"+filename; URLurl=null; try{ url=newURL(urlList); DataInputStreamdataInputStream=newDataInputStream(url.openStream()); FileOutputStreamfileOutputStream=newFileOutputStream(newFile(path)); ByteArrayOutputStreamoutput=newByteArrayOutputStream(); byte[]buffer=newbyte[1024]; intlength; while((length=dataInputStream.read(buffer))>0){ output.write(buffer,0,length); } BASE64Encoderencoder=newBASE64Encoder(); Stringencode=encoder.encode(buffer); fileOutputStream.write(output.toByteArray()); dataInputStream.close(); fileOutputStream.close(); }catch(MalformedURLExceptione){ e.printStackTrace(); }catch(IOExceptione){ e.printStackTrace(); } System.out.println("Download返回的filname="+filename); returnfilename; } }
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。