博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
htmlunit+fastjson抓取酷狗音乐 qq音乐链接及下载
阅读量:5457 次
发布时间:2019-06-15

本文共 5647 字,大约阅读时间需要 18 分钟。

上次学了jsoup之后,发现一些动态生成的网页内容是无法抓取的,于是又学习了htmlunit,下面是抓取酷狗音乐与qq音乐链接的例子:

酷狗音乐:

import java.io.BufferedInputStream;import java.io.FileOutputStream;import java.io.InputStream;import java.net.URL;import java.net.URLEncoder;import java.util.UUID;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.jsoup.nodes.Element;import com.alibaba.fastjson.JSONArray;import com.alibaba.fastjson.JSONObject;import com.gargoylesoftware.htmlunit.BrowserVersion;import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;import com.gargoylesoftware.htmlunit.Page;import com.gargoylesoftware.htmlunit.WebClient;public class worm7 {	 private static String name="离骚";     public static WebClient getWebClient(boolean flag){    	 WebClient webClient = new WebClient(BrowserVersion.FIREFOX_45); 		    	 webClient.getOptions().setUseInsecureSSL(true);    	 webClient.getOptions().setCssEnabled(false);              webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);         webClient.getOptions().setThrowExceptionOnScriptError(false);         webClient.getOptions().setRedirectEnabled(true);         webClient.getOptions().setAppletEnabled(false);         webClient.getOptions().setJavaScriptEnabled(flag);             webClient.getOptions().setTimeout(60000);         webClient.getOptions().setPrintContentOnFailingStatusCode(false);         webClient.setAjaxController(new NicelyResynchronizingAjaxController());          return webClient;     }     public static String getMp3Url(WebClient webClient){    	 FileOutputStream outputStream = null;         InputStream inputStream = null;         BufferedInputStream bis = null;    	try {			Page page=webClient.getPage("http://songsearch.kugou.com/song_search_v2?"					+ "callback=jQuery112408395432201569397_1532930925600"					+ "&keyword="+URLEncoder.encode(name, "utf-8")					+ "&page=1"					+ "&pagesize=30"					+ "&userid=-1"					+ "&clientver="					+ "&platform=WebFilter"					+ "&tag=em"					+ "&filter=2"					+ "&iscorrection=1"					+ "&privilege_filter=0"					+ "&_="+System.currentTimeMillis());			//System.out.println(page.getWebResponse().getContentAsString());			//System.out.println(zzee(page.getWebResponse().getContentAsString(),"(?<=\\(\\{).*?(?=\\}\\))"));			JSONObject job=JSONObject.parseObject("{"+zzee(page.getWebResponse().getContentAsString(),"(?<=\\(\\{).*?(?=\\}\\))")+"}").getJSONObject("data");			System.out.println("job:"+job);			JSONArray list=job.getJSONArray("lists");			System.out.println("list"+list);			for(int i=0;i

  运行结果:

qq音乐抓取实例:

import java.io.BufferedInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.net.MalformedURLException;import java.net.URL;import java.net.URLEncoder;import java.util.UUID;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.jsoup.nodes.Element;import com.alibaba.fastjson.JSON;import com.alibaba.fastjson.JSONArray;import com.alibaba.fastjson.JSONObject;import com.gargoylesoftware.htmlunit.BrowserVersion;import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;import com.gargoylesoftware.htmlunit.Page;import com.gargoylesoftware.htmlunit.WebClient;public class worm6 {	 private static String name="离骚";	 static String id1=null;	 static String id2=null;	 static String id3=null;	 static String id4=null;	 static String name1=null;	 static String name2=null;	 static String url = null;	 static JSONObject  job2=null;     public static WebClient getWebClient(boolean flag){    	 WebClient webClient = new WebClient(BrowserVersion.FIREFOX_45); 		    	 webClient.getOptions().setUseInsecureSSL(true);    	 webClient.getOptions().setCssEnabled(false);              webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);         webClient.getOptions().setThrowExceptionOnScriptError(false);         webClient.getOptions().setRedirectEnabled(true);         webClient.getOptions().setAppletEnabled(false);         webClient.getOptions().setJavaScriptEnabled(flag);             webClient.getOptions().setTimeout(60000);         webClient.getOptions().setPrintContentOnFailingStatusCode(false);         webClient.setAjaxController(new NicelyResynchronizingAjaxController());          return webClient;     }     public static String getMp3Url(WebClient webClient){    	     	try {			Page page=webClient.getPage("https://c.y.qq.com/soso/fcgi-bin/client_search_cp?"					+ "ct=24"					+ "&qqmusic_ver=1298"					+ "&new_json=1"					+ "&remoteplace=txt.yqq.center"					+ "&searchid=36047978388657978"					+ "&t=0"					+ "&aggr=1"					+ "&cr=1"					+ "&catZhida=1"					+ "&lossless=0"					+ "&p=1"					+ "&n=20"					+ "&w="+URLEncoder.encode(name, "utf-8")					+ "&g_tk=5381"					+ "&jsonpCallback=MusicJsonCallback6176591962889693"					+ "&loginUin=0"					+ "&hostUin=0"					+ "&format=jsonp"					+ "&inCharset=utf8"					+ "&outCharset=utf-8"					+ "&notice=0"					+ "&platform=yqq"					+ "&needNewCode=0"					);			//System.out.println("page:"+page);			//System.out.println("------"+page.getWebResponse().getContentAsString());			//System.out.println("======"+zzee(page.getWebResponse().getContentAsString(),"(?<=\\(\\{).*?(?=\\}\\))"));						JSONObject job=JSONObject.parseObject("{"+zzee(page.getWebResponse().getContentAsString(),"(?<=\\(\\{).*?(?=\\}\\))")+"}").getJSONObject("data");			//System.out.println("job:"+job);			String job0=job.getString("song");			//System.out.println("job0"+job0);			job=JSON.parseObject(job0);			JSONArray list=job.getJSONArray("list");			//System.out.println("list:"+list);			for(int i=0;i

  

运行结果:

 

 

相比之下,酷狗音乐相对好爬一些,QQ音乐有些繁琐。。。

转载于:https://www.cnblogs.com/xr210/p/9404325.html

你可能感兴趣的文章
机器学习评价方法 - Recall & Precision
查看>>
dedecms网站迁移时记得将安装目录放空 附迁移的正确方法
查看>>
E. Intersection of Permutations
查看>>
将数据集做成VOC2007格式用于Faster-RCNN训练
查看>>
[数学趣味001]RSA算法原理及示例
查看>>
自定义Scons中Builder
查看>>
Petshop学习第三天
查看>>
zookeeper笔记
查看>>
JavaMail发送邮件
查看>>
centos6.5和centos7如何搭建php环境
查看>>
js 金额分隔符的方法
查看>>
Selenium定位元素-Xpath的使用方法
查看>>
oracle 模糊查询中的转义字符用法
查看>>
java克隆对象clone()的使用方法和作用
查看>>
unity 切圆角矩形 --shader编程
查看>>
大话设计模式C++版——装饰模式
查看>>
java和C#异常处理的差异
查看>>
pre标签
查看>>
缓存好文章
查看>>
auto function -> return type 当不能从{}内推断类型时
查看>>