您好,登錄后才能下訂單哦!
這篇文章主要講解了“Java爬蟲之如何實現B站粉絲取關人排查”,文中的講解內容簡單清晰,易于學習與理解,下面請大家跟著小編的思路慢慢深入,一起來研究和學習“Java爬蟲之如何實現B站粉絲取關人排查”吧!
開發工具:Eclipse/IDEA
瀏覽器:Google Chrome
瀏覽器Selement驅動:Selenium 3.5
Jar包:
// Selenium驅動版本需要和Chrome瀏覽器版本對應
獲取Cookie(終端輸入或者使用Selenium打開掃碼登錄)
請求https://api.bilibili.com/x/relation/followers接口
解析數據
存入csv
package com.mm.rep; import java.io.IOException; import java.nio.charset.Charset; import java.util.List; import java.util.Scanner; import java.util.Set; import org.openqa.selenium.Cookie; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.URI; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.openqa.selenium.WebDriver; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.csvreader.CsvWriter; import org.apache.commons.lang3.StringUtils; public class Main { private static final Logger logger = LogManager.getLogger(Main.class); private static WebDriver driver = null; private static GetMethod getMethod = null; private static Set<Cookie> bcookies = null; private final static String BLOGINURL = "https://passport.bilibili.com/login"; private final static String BMAINPAGE = "https://www.bilibili.com/"; Main(){ BasicConfigurator.configure(); // 初始化GetMethod,設置不變的RequestHeader getMethod = new GetMethod(); getMethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF-8"); getMethod.addRequestHeader(":authority", "api.bilibili.com"); getMethod.addRequestHeader(":method", "api.bilibili.com"); getMethod.addRequestHeader(":scheme", "GET"); getMethod.addRequestHeader(":scheme", "https"); getMethod.addRequestHeader("accept", "*/*"); getMethod.addRequestHeader(":scheme", "https"); getMethod.addRequestHeader("accept-language", "zh-CN,zh;q=0.9"); getMethod.addRequestHeader("sec-fetch-dest", "script"); getMethod.addRequestHeader("sec-fetch-mode", "no-cors"); getMethod.addRequestHeader("sec-fetch-site", "same-site"); getMethod.addRequestHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36"); } public static String getCookie() throws InterruptedException { String scCookie = null; Scanner ip = new Scanner(System.in); logger.info("請輸入Cookie,如果沒有請按回車:"); scCookie = ip.nextLine(); if(scCookie.length() != 0) { return scCookie; } logger.info("開始掃碼登錄"); // 設置驅動地址 System.setProperty("webdriver.chrome.driver", "H:/chromedriver/chromedriver.exe"); // 啟動設置 ChromeOptions options = new ChromeOptions(); // 創建ChromeDriver driver = new ChromeDriver(options); // 打開Bilibili登錄頁面 driver.get(BLOGINURL); // 等待掃碼登錄 while(true) { if(driver.getCurrentUrl().equals(BMAINPAGE)) { break; }else { Thread.sleep(100); } } logger.info("掃碼登錄成功"); //獲取cookie bcookies = driver.manage().getCookies(); String cookie = StringUtils.join(bcookies, "; "); return cookie; } public static List<JSONObject> getFanS(String cookie,String vmid,int pn,int ps) throws InterruptedException, HttpException, IOException { HttpClient client = new HttpClient(); // 拼接url StringBuffer sBuffer = new StringBuffer(); sBuffer.append("https://api.bilibili.com/x/relation/followers?vmid="); sBuffer.append(vmid); sBuffer.append("&pn="); sBuffer.append(pn); sBuffer.append("&ps="); sBuffer.append(ps); sBuffer.append("&order=desc&jsonp=jsonp"); getMethod.setURI(new URI(sBuffer.toString(), true)); getMethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF-8"); // 設置請求頭 getMethod.addRequestHeader("cookie", cookie); // 發送請求 client.executeMethod(getMethod); // 獲取數據 String info = new String(getMethod.getResponseBody(), "UTF-8"); JSONObject fans = JSONObject.parseObject(info).getJSONObject("data"); JSONArray fArray = JSONArray.parseArray(fans.getString("list")); return JSON.parseArray(fArray.toJSONString(), JSONObject.class); } public static void main(String[] args) throws InterruptedException, HttpException{ logger.info("程序開始..."); new Main(); // 獲取Cookie String cookie = Main.getCookie(); CsvWriter csvWriter = new CsvWriter("C:\\Users\\computer\\Desktop\\aaa.csv", ',', Charset.forName("UTF-8")); String[] csvHeaders = { "mid", "粉絲名字","粉絲簽名","粉絲頭像"}; try { csvWriter.writeRecord(csvHeaders); int pn = 1; boolean end = false; while(true) { for (JSONObject f : Main.getFanS(cookie, "309103931", pn, 20)) { if(f == null) { end = true; break; } String[] csvContent1 = {f.getString("mid"), f.getString("uname"),f.getString("sign"),f.getString("face")}; System.out.println(csvContent1); csvWriter.writeRecord(csvContent1); } pn++; Thread.sleep(100); if(end == true) { break; } } } catch (IOException e) { System.out.println(e); e.printStackTrace(); } csvWriter.close(); driver.close(); logger.info("程序結束"); } }
容易被攔截,最多獲取不到1000個
感謝各位的閱讀,以上就是“Java爬蟲之如何實現B站粉絲取關人排查”的內容了,經過本文的學習后,相信大家對Java爬蟲之如何實現B站粉絲取關人排查這一問題有了更深刻的體會,具體使用情況還需要大家實踐驗證。這里是億速云,小編將為大家推送更多相關知識點的文章,歡迎關注!
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。