黑匣子
满天星
Fork me on GitHub

通过JAVA中的httpclient刷取CSDN博客访问量

config.properties

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
The_home_page=https://blog.csdn.net/dataiyangu
IP_And_Port=171.41.81.5:9999,\
218.60.8.98:3129,\
218.60.8.99:3129,\
113.200.56.13:8010,\
171.41.82.89:9999,\
1.48.150.228:8118,\
171.41.82.169:9999,\
115.151.2.133:9999,\
59.62.167.5:9999,\
110.52.235.121:9999,\
124.235.135.87:80,\
171.41.86.55:9999,\
125.123.141.4:9999,\
111.177.179.9:9999,\
121.61.3.130:9999,\
116.209.56.53:9999,\
110.52.235.241:9999,\
124.206.234.126:3128,\
110.52.235.150:9999,\
218.204.204.90:8118,\
125.123.139.235:9999,\
116.209.58.107:9999,\
116.209.57.207:9999,\
121.61.27.200:9999,\
110.52.235.146:9999,\
121.254.214.219:80,\
66.70.147.197:3128,\
152.231.81.122:53281,\
91.134.137.31:8118,\
71.13.112.152:3128,\
223.93.172.248:3128,\
218.60.8.98:3129,\
218.207.212.86:80,\
218.60.8.99:3129,\
205.204.248.88:9090,\
109.236.89.172:1080,\
66.119.180.101:80,\
113.200.56.13:8010 ,\
120.52.73.1:80 ,\
66.119.180.103:80 ,\
70.29.69.120:80 ,\
66.119.180.104:80 ,\
212.237.33.61:3128 ,\
205.204.248.76:9090 ,\
94.130.14.146:31288 ,\
54.39.40.100:80 ,\
103.205.26.120:80 ,\
51.254.92.205:1080

CSDN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.util.HashSet;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;


public class CSDN {
public static Properties getProperties() {
Properties properties = new Properties();
InputStream resourceAsStream = CSDN.class.getResourceAsStream("config.properties");
try {
properties.load(resourceAsStream);
} catch (IOException e) {
e.printStackTrace();
}
return properties;
}
private String zhuye;
private String sousuo = "/article/details/";
public CSDN() {
zhuye = getProperties().getProperty("The_home_page");
}
public CSDN(String url) {
zhuye = url;
}
public String getZhuye() {
return zhuye;
}

public void setZhuye(String zhuye) {
this.zhuye = zhuye;
}
public String getSousuo() {
return sousuo;
}
public void setSousuo(String sousuo) {
this.sousuo = sousuo;
}
public String open(String url) {
StringBuffer str = new StringBuffer();
BufferedReader in = null;
try {
URL u = new URL(url);
try {
/*原来的代码*/
// in = new BufferedReader(new InputStreamReader(u.openStream(), "UTF-8"));
HttpURLConnection htpcon = (HttpURLConnection) u.openConnection();
htpcon.setRequestMethod("GET");
htpcon.setDoOutput(true);
htpcon.setDoInput(true);
htpcon.setUseCaches(false);
//没有看到调用的地方,应该是在将之前的连接动态的排除异常
htpcon.setConnectTimeout(10000);
htpcon.setReadTimeout(10000);
InputStream inputStream = htpcon.getInputStream();
in = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
} catch (SocketTimeoutException e) {
System.out.println("本次请求超时了,别要慌张。");
return "超时了";
}
while (true) {
if (in != null) {
String s = in.readLine();
if (s == null) break;
else str.append(s);
}
}
} catch (SocketException e) {
System.out.println("上面的这个代理失效了,请更换。");
} catch (IOException e) {
System.out.println("上面的这个代理失效了,请更换。");
} finally {
try {
if (in != null) in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return str.toString();
}

public HashSet<String> sousuoHTML(String str) {
HashSet<String> set = new HashSet<String>();
int st, end;
while ((st = str.indexOf(zhuye + sousuo)) != -1) {
if ((end = str.indexOf("\"", st)) != -1) {
String s = str.substring(st, end);
if (s.indexOf("#comments") != -1) {
s = s.substring(0, s.indexOf("#comments"));
}
set.add(s);
str = str.substring(end);
}
}
return set;
}

public int getFangke() {
String str = open(zhuye);
int i;
if ((i = str.indexOf("访问:")) != -1) {
str = str.substring(i);
str = str.substring(str.indexOf("\"") + 1);
str = str.substring(0, str.indexOf("\""));
} else if ((i = str.indexOf("personal_list")) != -1) {
str = str.substring(i);
str.substring(str.indexOf("<em>") + 4, str.indexOf("</em>"));
}
int ii = 0;
try {
ii = Integer.parseInt(str);
} catch (NumberFormatException e) {
}
return ii;
}

public void daili(String ip, String dk) {
Properties prop = System.getProperties();
// 设置http访问要使用的代理服务器的地址
prop.setProperty("http.proxyHost", ip);
// 设置http访问要使用的代理服务器的端口
prop.setProperty("http.proxyPort", dk);
// 设置不需要通过代理服务器访问的主机,可以使用*通配符,多个地址用|分隔
prop.setProperty("http.nonProxyHosts", "localhost|192.168.168.*");
// 设置安全访问使用的代理服务器地址与端口
// 它没有https.nonProxyHosts属性,它按照http.nonProxyHosts 中设置的规则访问
prop.setProperty("https.proxyHost", ip);
prop.setProperty("https.proxyPort", dk);
// 使用ftp代理服务器的主机、端口以及不需要使用ftp代理服务器的主机
prop.setProperty("ftp.proxyHost", ip);
prop.setProperty("ftp.proxyPort", dk);
prop.setProperty("ftp.nonProxyHosts", "localhost|192.168.168.*");
// socks代理服务器的地址与端口
prop.setProperty("socksProxyHost", ip);
prop.setProperty("socksProxyPort", dk);
System.out.println("即将开始代理进行访问 ip:" + ip + " port:" + dk);
}

public static String[] dl = getProperties().getProperty("IP_And_Port").split(",");

static class Main_thread implements Runnable {
public void run() {
AtomicInteger atomicInteger = new AtomicInteger();
atomicInteger.set(1);
int i = 0;
CSDN csdn = new CSDN();
while (true) {
System.out.println("当前博客访问量:" + csdn.getFangke()+"");
long a = System.currentTimeMillis();
for (i = 0; i < dl.length; i++) {
String[] dd = dl[i].split(":");
csdn.daili(dd[0], dd[1]);
HashSet<String> set = null;
try {
set = csdn.sousuoHTML(csdn.open(csdn.getZhuye()));
} catch (Exception e) {
System.out.println("上面的这个代理失效了,请更换。。。");
}
try {
for (String url : set) {
csdn.open(url);
System.out.println("正在打开:" + url);
}
} catch (NullPointerException e) {
}
}
System.out.println("---------------------------------------------------------------------------");
System.out.println(" ");
System.out.println("所有的代理已经访问:" + atomicInteger.getAndIncrement()+"次");
if (csdn.getFangke() != 0) {
System.out.println("当前博客访问量:" + csdn.getFangke()+"");
}
long b = System.currentTimeMillis();
long c = b - a;
System.out.println("本次代理请求耗时:"+c+"秒");
if(c>10000) {
try {
double v = Math.random() * 10;
System.out.println("即将休息:"+(long) (v*1000)+"毫秒");
Thread.sleep((long) (v*1000));
System.out.println("休息完成,即将开始下轮访问。");
System.out.println(" ");
System.out.println("---------------------------------------------------------------------------");
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
}

public static void main(String[] args) {
ExecutorService executorService = Executors.newFixedThreadPool(1 );
Main_thread main_thread = new Main_thread();
for (int i = 0; i < 1; i++) {
executorService.execute(main_thread);
}
}
}

用法

只需要将上面两个文件放在java项目中的同一目录下,修改配置文件中的博客地址和代理ip、port即可。
然后将项目打成jar包,放到自己服务器上

1
nohup java -jar xxx.jar

-------------The End-------------

本文标题:通过JAVA中的httpclient刷取CSDN博客访问量

文章作者:Leesin.Dong

发布时间:2019年02月08日 - 19:02

最后更新:2019年02月09日 - 16:02

原始链接:http://mmmmmm.me/2019-02-08.html

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。

客官客官,不可以,你要对我负责~~~