一、xxxxxx获取指定任务爬取的所有url的接口

接口名称:xxxxxx获取指定任务爬取的所有url的接口

访问链接:  http://IP:PORT/crwalTask/findUrlExceptionById?ctId=ctIdVal&time=timeVal&limit=limitVal

传入参数类型:String,int
参数内容:  
图片描述

返回类型:JSONArray
返回内容:  
图片描述

调用方法Demo 

图片描述

图片描述

 public static void main(String[] args) throws Exception {
         //爬虫访问接口地址
         String req_url = "http://192.168.1.105:8080/crwalTask/findUrlExceptionById?ctId=ctIdVal&time=timeVal&limit=limitVal";
         JSONArray jsonArray = httpRequest(req_url,"ba716af7-105c-481b-bf28-2e9231529947",SelectUtil.time,SelectUtil.number);//200
         System.out.println(jsonArray);
     }    
 
 public class SelectUtil {
     public  static final String time = "2018-03-05".replaceAll(" ", "=");//按时间筛选  格式"yyyy-mm-dd"或"yyyy-mm-dd HH:mm:ss"
     public  static final int number = 162;//查询限制数量
 }
 /**
      * 获取指定任务爬取的所有url信息
      * @param req_url 访问指定任务爬取的url的链接地址
      * @param ctId 指定的任务Id
      * @param time 查询时间
      * @param limit 查询限制的条数
      * @return
      */
     public static JSONArray httpRequest(String req_url,String ctId,String time,int limit) {
         req_url = req_url.replace("ctIdVal",ctId);
         req_url = req_url.replace("timeVal",time);
         req_url = req_url.replace("limitVal",String.valueOf(limit));
         StringBuffer buffer = new StringBuffer();
         JSONArray jsonArray = null;
         try {  
             URL url = new URL(req_url);  
             HttpURLConnection httpUrlConn = (HttpURLConnection) url.openConnection();  
 
             httpUrlConn.setDoOutput(false);  
             httpUrlConn.setDoInput(true);  
             httpUrlConn.setUseCaches(false);  
 
             httpUrlConn.setRequestMethod("POST");  
             httpUrlConn.connect();  
 
             // 将返回的输入流转换成字符串  
             InputStream inputStream = httpUrlConn.getInputStream();  
             InputStreamReader inputStreamReader = new InputStreamReader(inputStream, "utf-8");  
             BufferedReader bufferedReader = new BufferedReader(inputStreamReader);  
 
             String str = null;  
             while ((str = bufferedReader.readLine()) != null) {  
                 buffer.append(str);  
             }  
             bufferedReader.close();  
             inputStreamReader.close();  
             // 释放资源  
             inputStream.close();  
             inputStream = null;  
             httpUrlConn.disconnect();  
             if("".equals(buffer.toString())){
                 String exception = "[\"exception\",\"查询的记录数超过240\"]";
                 
                 jsonArray = JSONArray.fromObject(exception);
             }else{
                 jsonArray = JSONArray.fromObject(buffer.toString());
             }
         } catch (Exception e) {  
             System.out.println(e.getMessage());  
         }
         
         return jsonArray;  
     }

View Code
需要的Jar包:

  commons-beanutils-1.9.3.jar

  commons-collections-3.2.2.jar

  commons-lang-2.6.jar

  commons-logging-1.2.jar

  ezmorph-1.0.6.jar

  json-lib-2.4-jdk15.jar

Sql脚本  

  alter table urlpathmapper add exceptionInfo varchar(2048) comment 'URL运行错误信息'

  alter table urlpathmapper add title varchar(256) comment '爬取标题'

  alter table crawltaskmanage add checkFile varchar(8) comment '文件是否校验0是1否'

  alter table crawltaskmanage add SimHashValue int(8) comment 'SimHash算法重复度比较值'