亚洲激情专区-91九色丨porny丨老师-久久久久久久女国产乱让韩-国产精品午夜小视频观看

溫馨提示×

溫馨提示×

您好,登錄后才能下訂單哦!

密碼登錄×
登錄注冊×
其他方式登錄
點擊 登錄注冊 即表示同意《億速云用戶服務條款》

使用chrome-har導出瀏覽器HAR數據

發布時間:2020-07-31 21:38:50 來源:網絡 閱讀:1551 作者:mybabe0312 欄目:軟件技術

這里使用nodejs下的chrome-har庫來導出瀏覽器的har數據,經驗證效果不錯,比較靠譜。

1,創建日志配置(ultra-harlog/module/log.js)

//cnpm install --save log4js
const log4js = require('log4js');

const options = {
    appenders:{
        console:{
            type: "console"
        },
        "puppeteer-record":{
            type : 'dateFile',
            filename : 'logs/puppeteer/log',
      pattern : '-yyyy-MM-dd.log',
      alwaysIncludePattern : true,
      encoding : 'utf-8'
        },
        "puppeteer-har-record":{
            type : 'dateFile',
            filename : 'logs/puppeteerhar/log',
      pattern : '-yyyy-MM-dd.log',
      alwaysIncludePattern : true,
      encoding : 'utf-8'
        },
        "puppeteer-harevent-record":{
            type : 'dateFile',
            filename : 'logs/puppeteerharevent/log',
      pattern : '-yyyy-MM-dd.log',
      alwaysIncludePattern : true,
      encoding : 'utf-8'
        }
    }   ,
    "categories": {
    "default": { "appenders": ['console', "puppeteer-record", "puppeteer-har-record","puppeteer-harevent-record"], "level": "all" }
  }
}
log4js.configure(options);

function getConsoleLogger(){
    let consoleLog = log4js.getLogger('console');   
    return consoleLog ;
}

function getPuppeteerRecordLogger(){
    let consoleLog = log4js.getLogger('puppeteer-record');  
    return consoleLog ;
}

function getPuppeteerHarRecordLogger(){
    let consoleLog = log4js.getLogger('puppeteer-har-record');  
    return consoleLog ;
}

function getPuppeteerHarEventRecordLogger(){
    let consoleLog = log4js.getLogger('puppeteer-harevent-record'); 
    return consoleLog ;
}

exports.getConsoleLogger = getConsoleLogger;
exports.getPuppeteerRecordLogger = getPuppeteerRecordLogger;
exports.getPuppeteerHarRecordLogger = getPuppeteerHarRecordLogger;
exports.getPuppeteerHarEventRecordLogger = getPuppeteerHarEventRecordLogger;

創建抓取的代碼(harlog/module/puppeteerhar.js)

const puppeteer = require('puppeteer');
const PuppeteerHar = require('puppeteer-har');
const path = require("path");

const logger=require("./log");
const grpcclient=require("./grpcclient");

const log = logger.getPuppeteerHarRecordLogger() ;

/*
    啟動瀏覽器
*/ 
async function launchBrowser(){
    //啟動瀏覽器實例 [puppeteer.createBrowserFetcher([options])]
  let browser = await puppeteer.launch({
    // 若是手動下載的chromium需要指定chromium地址, 默認引用地址為 /項目目錄/node_modules/puppeteer/.local-chromium/
    //executablePath: '/Users/huqiyang/Documents/project/z/chromium/Chromium.app/Contents/MacOS/Chromium',
    //如果是訪問https頁面 此屬性會忽略https錯誤
    ignoreHTTPSErrors: true,
    // 關閉headless模式, 不會打開瀏覽器
    headless: true,
    //瀏覽器啟動參數 https://peter.sh/experiments/chromium-command-line-switches/   --timeout
    args:['--disk-cache-size=0','--disable-cache','--disable-infobars','--window-size=800,600','--ignore-certificate-errors','--enable-feaures'],
    //是否為每個選項卡自動打開DevTools面板。 如果此選項為true,則headless選項將設置為false。
    devtools: false,
    //Defaults to 30000 (30 seconds). Pass 0 to disable timeout.
    timeout: 0
    //放慢puppeteer執行的動作,方便調試
    //slowMo: 250
  });
  return browser ;
}

async function saveHarlog(url,dirPath,filename){
    let homesite = url ;
    //保存的文件路徑
    let harFilePath = path.join(dirPath,filename) ;
    //處理URL
    if(!(url.startsWith('http://') || url.startsWith('https://'))){
        url = "http://" + url ;
    }
  //打開瀏覽器
  let browser = await launchBrowser() ;

  //Puppeteer 初始化的屏幕大小默認為 800px x 600px。但是這個尺寸可以通過 Page.setViewport() 設置。
  /*
  await page.setViewport({
        width: 800,
        height: 600
  });
  */

  //創建一個新頁面
  //let page = await browser.newPage();
  const page = (await browser.pages())[0];
  await page.waitFor(1000); //delay 1 s

  //page.setDefaultTimeout(12000);
  //page.setJavaScriptEnabled(enabled)

  //事件監聽輕松打出頁面的log
  //page.on('console', msg => log.info('PAGE LOG:', msg.text()));

  let har = new PuppeteerHar(page);
  try{
    await har.start({ path:harFilePath});

    /*
        頁面跳轉相關函數:
        page.goto(url, options)  //相當于在瀏覽器中輸入了地址,然后回車
        page.goBack(options)
        page.goForward(options)
        page.reload(options)
    */
    await page.goto(url,{
            timeout:0
    });

    log.info(page.mainFrame().title());
    log.info(page.mainFrame().url());

    //返回HTML文檔內容
    //const html = await page.$eval('html', e => e.outerHTML);
    //const html = await page.content() ;

    //通知JAVA解析HAR文件
        /*
      try{
        grpcclient.resovleHarLog({
                url:homesite,
                file_name:filename,
                file_dir:dirPath,
                context:''
            });
      }catch(err){
        log.error('發送RPC請求失敗,' + err);
      }
        */

  }catch(error){
    log.info('resovle error :' + url + ";  error message:" + error) ;
  }finally{
    if(har){
        await har.stop();       
    }
    if(browser){
        await browser.close();      
    }
  } 
}
exports.launchBrowser = launchBrowser;
exports.saveHarlog = saveHarlog;

創建啟動文件(ultra-harlog/puppeteerhar-app.js)

const fs = require("fs");
const path = require("path");
const moment = require("moment");
const schedule = require('node-schedule');

const cvsresovler=require("./module/cvsresovle");
const mhar=require("./module/puppeteerhar");

/*
cnpm install --save moment
cnpm install --save csv
cnpm install --save node-schedule
cnpm install --save puppeteer
cnpm install --save puppeteer-har
cnpm install --save iconv-lite
cnpm install --save chrome-har

cnpm install --save grpc

*/  
function init(){
        console.log('初始化調度器') ;
    //每分鐘的第30秒定時執行一次:
    schedule.scheduleJob('0 14 10 * * *',()=>{
        let ftime = moment().format('YYYYMMDDHHmm');
        console.log('當前調度時間為:' + ftime) ;
        let dirPath = path.join(__dirname,'harlogs',ftime) ;
        console.log("創建目錄:" + dirPath) ;

        let isExist = false ;
        if(fs.existsSync(dirPath)){
                //創建文件夾
                let stat = fs.lstatSync(dirPath);
                if(stat.isDirectory()){
                    isExist = true ;
                }
        }
        if(!isExist){
            //創建文件夾
            console.log("創建文件夾" + ftime) ;
                fs.mkdirSync(dirPath);
        }       
        //開始解析需要處理的URL
        let dataArr = cvsresovler.readUrlRecord(path.join(__dirname,'top300.csv')) ;
        console.log("解析出URL共計" + dataArr.length + "條") ;

        /*
            開始抓取HAR數據【同步的方式執行】。
            注意:如果這里直接通過for循環遍歷dataArr并調用saveHarlog方法,那么這將是一個異步的過程。
        */
            (async function iterator(i){
                    let data =  dataArr[i]
                    let url = data['SITE_LINK'] ;

                url = url.trim() ;
                let filename = url.replace(/\//g,'-').replace(/\\/g,'-') + '.har' ;
                if(url){
                            console.log((i+1) + "-starting to resovle url :" + url ) ;
                    try{
                                await mhar.saveHarlog(url,dirPath,"N" + "-" + filename) ;
                            }catch(error){
                                console.log(error) ;
                            }
                }
                if(i + 1 < dataArr.length){
                    iterator(i+1) ;
                }
            })(0) ;
    }); 
    console.log('應用程序啟動完成') ;
}
//執行
//init();

/**
    用于測試的方法
*/
async function test(){
        let ftime = moment().format('YYYYMMDDHHmm');
    console.log('當前執行時間為:' + ftime) ;
    let dirPath = path.join(__dirname,'harlogs',ftime) ;
    console.log("創建目錄:" + dirPath) ;

    let isExist = false ;
    if(fs.existsSync(dirPath)){
            //創建文件夾
            let stat = fs.lstatSync(dirPath);
            if(stat.isDirectory()){
                isExist = true ;
            }
    }
    if(!isExist){
        //創建文件夾
        console.log("創建文件夾" + ftime) ;
          fs.mkdirSync(dirPath);
    }     

    //測試的URL
        let url = "www.baidu.com" ; 

        let arguments = process.argv.splice(2);
        if(arguments.length > 0 ){
            url = arguments[0] ;
        }

        url = url.trim() ;
        let filename = url.replace(/\//g,'-').replace(/\\/g,'-') + '.har' ;
        if(url){
            console.log("starting to resovle test url :" + url ) ;
            try{
                await mhar.saveHarlog(url,dirPath,"NT" + "-" + filename) ;
            }catch(error){
                console.log(error) ;
            }
        }
}
//運行測試
test() ;

關于GRPC部分的代碼,請參考我另外一篇博文

參考地址:https://michaljanaszek.com/blog/generate-har-with-puppeteer

向AI問一下細節

免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。

AI

讷河市| 岳阳市| 高阳县| 白山市| 绥化市| 河间市| 砚山县| 额敏县| 扬中市| 台江县| 历史| 河南省| 稷山县| 休宁县| 兴山县| 柏乡县| 抚顺县| 云和县| 江西省| 乐昌市| 永德县| 尼玛县| 永福县| 甘洛县| 依安县| 四子王旗| 柘荣县| 女性| 商水县| 合水县| 康定县| 连云港市| 龙南县| 白银市| 道孚县| 安阳县| 茌平县| 临潭县| 海兴县| 郧西县| 阳新县|