Java通过URL获取公众号文章生成HTML的方法
说明:通过公众号URL获取的内容,文字可以正常显示,但是图片存在跨域访问的问题,微信不允许跨域访问公众号图片,所以需要将公众号图片从存入本地后,再上传至OSS,然后把HTML中的图片全部替换为自己的OSS地址就可以了
这里就需要在后台对HTML进行DOM的解析,需要用的Jsoup
com.aliyun.oss aliyun-sdk-oss 2.2.3 org.jsoup jsoup 1.9.2
controller
packagecom.iueang.controller;
importjava.io.File;
importjava.util.HashMap;
importjava.util.Map;
importorg.jsoup.Jsoup;
importorg.jsoup.nodes.Document;
importorg.jsoup.nodes.Element;
importorg.jsoup.select.Elements;
importorg.springframework.stereotype.Controller;
importorg.springframework.web.bind.annotation.RequestMapping;
importorg.springframework.web.bind.annotation.ResponseBody;
importcom.iueang.util.DownLoadImg;
importcom.iueang.util.GetBody;
importcom.iueang.util.OssUtil2;
importcom.iueang.util.UrlUtil;
@Controller
publicclassTestUrl{
@RequestMapping("tohtml")
publicStringtohtml(){
return"html/index.html";
}
@RequestMapping("getHtml")
@ResponseBody
publicMapgetHtml(Stringurl){
//获取url文章生成文本
Stringhtml=UrlUtil.getAccess(url);
Stringreg="(.*?)";
Stringhead=GetBody.getSubUtilSimple(html,reg);
StringHTTPHOST="http://yueang2.oss-cn-qingdao.aliyuncs.com/testimg/";
StringnewsBody=head;
Documentdoc=Jsoup.parse(newsBody);
Elementspngs=doc.select("img[data-src]");
System.out.println(pngs);
for(Elementelement:pngs){
//获取图片地址
StringimgUrl=element.attr("data-src");
//下载图片到本地
Stringfilename=DownLoadImg.downloadPicture(imgUrl);
Filefile=newFile("D:\\m2\\"+filename);
//上传至oss
Booleanflag=OssUtil2.uploadFileToOss(file,"testimg/"+filename);
if(flag){
file.delete();
}
Stringnewsrc=HTTPHOST+filename;
element.attr("src",newsrc);
}
newsBody=doc.toString();
System.out.println(newsBody);
Mapmap=newHashMap();
map.put("resultHtml",newsBody);
returnmap;
}
}
util工具类
GetBody类
packagecom.iueang.util;
importjava.util.regex.Matcher;
importjava.util.regex.Pattern;
publicclassGetBody{
publicstaticStringgetSubUtilSimple(Stringhtml,Stringreg){
Patternpattern=Pattern.compile(reg);//匹配的模式
Matcherm=pattern.matcher(html);
while(m.find()){
returnm.group(1);
}
return"";
}
}
OssUtil类
packagecom.iueang.util;
importjava.io.File;
importjava.util.HashMap;
importjava.util.Map;
importcom.aliyun.oss.OSSClient;
importcom.aliyun.oss.model.ObjectMetadata;
publicclassOssUtil2{
//以下几个参数值必填,参考文章最后文档
staticStringendpoint="http://oss-cn-qingdao.aliyuncs.com";
staticStringaccessKeyId="oss获取";
staticStringaccessKeySecert="oss获取";
staticStringbucketName="yueang2";
/**
*上传单个文件到OSS
*@paramfile要上传的文件File对象
*@paramobjName上传后的文件名,包含文件夹,比如game/game/test.txt
*@return
*/
publicstaticbooleanuploadFileToOss(Filefile,StringobjName){
try{
OSSClientossClient=null;
try{
ossClient=newOSSClient(endpoint,accessKeyId,accessKeySecert);
}catch(Exceptione){
e.printStackTrace();
}
ObjectMetadatameta=newObjectMetadata();
ossClient.putObject(bucketName,objName,file,meta);
ossClient.shutdown();
}catch(Exceptione){
e.printStackTrace();
returnfalse;
}
returntrue;
}
}
DownLoadImg类
packagecom.iueang.util;
importjava.io.ByteArrayOutputStream;
importjava.io.DataInputStream;
importjava.io.File;
importjava.io.FileOutputStream;
importjava.io.IOException;
importjava.net.MalformedURLException;
importjava.net.URL;
importjava.util.UUID;
importsun.misc.BASE64Encoder;
publicclassDownLoadImg{
publicstaticStringdownloadPicture(StringurlList){
Stringfilename="iueang"+UUID.randomUUID().toString()+".png";
Stringpath="D:/m2/"+filename;
URLurl=null;
try{
url=newURL(urlList);
DataInputStreamdataInputStream=newDataInputStream(url.openStream());
FileOutputStreamfileOutputStream=newFileOutputStream(newFile(path));
ByteArrayOutputStreamoutput=newByteArrayOutputStream();
byte[]buffer=newbyte[1024];
intlength;
while((length=dataInputStream.read(buffer))>0){
output.write(buffer,0,length);
}
BASE64Encoderencoder=newBASE64Encoder();
Stringencode=encoder.encode(buffer);
fileOutputStream.write(output.toByteArray());
dataInputStream.close();
fileOutputStream.close();
}catch(MalformedURLExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
System.out.println("Download返回的filname="+filename);
returnfilename;
}
}
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。