Ⅰ java程序读取一个url页面的源代码
传入一个url,返回源代码; public static String getHTML(String url){// 获取指定URL的网页,返回网页内容的字符串,然后将此字符串存到文件即可 try { URL newUrl = new URL(url); URLConnection connect = newUrl.openConnection(); connect.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)"); DataInputStream dis = new DataInputStream(connect.getInputStream()); BufferedReader in = new BufferedReader(new InputStreamReader(dis,"UTF-8")); String html = ""; String readLine = null; while((readLine = in.readLine()) != null) { html = html + readLine; } in.close(); return html; }catch (MalformedURLException me){ System.out.println("MalformedURLException" + me); }catch (IOException ioe){ System.out.println("ioeException" + ioe); } return null; }
Ⅱ java中读取网页源代码时,使用readline函数的问题
这也需要解释,readLine()是中断式的呗,每次运行到这一行都要读取到下一行之后才会继续后面的程序
readLine是中断式的你没有办法解决,再说也不是你的问题,是对方服务器或者是网络的问题,我们能做的只是设置一个timeout,时间过了,提示读取失败,可以试试apache的HttpClient
Ⅲ 求一份基于JAVA的即时通讯软件的源码,必须要完整的放在eclipse下可直接运行的,最好不要有错误,简单就行
商品社会拒绝白干,第一,没有人会给你这个程序。第二,就算有人给你,他也不会耐心告诉你怎么配置,怎么装数据库。怎么运行。 随随便便来这里就想白要,你觉得可能吗。我说话可能不好听,但是这是事实。
Ⅳ 设定一个程序 下载由url指定的网页源代码 指出其中所有超链接
publicclassTestReg{
/**多次使用的使用不需要重新编译正则表达式了,对于频繁调用能提高效率
*
*
**/
// ="<[aA]\s*(href=[^>]+)>(.*?)</[aA]>";
="(http://[^>]+)"";
publicstaticPatternpattern1=Pattern.compile(patternString1,Pattern.DOTALL);
/**
*@paramargs
*/
publicstaticvoidmain(String[]args){
/**测试的数据*/
Stringss="http://music..com/song/602998?fm=altg5";
List<String>urls=getWebCon(ss);
for(Iteratoriterator=urls.iterator();iterator.hasNext();){
System.out.println(iterator.next().toString());
}
}
publicstaticStringparseUrl(Stringvar)
{
Matchermatcher=null;
StringBuffersb=newStringBuffer();
matcher=pattern1.matcher(var);
while(matcher!=null&&matcher.find())
{
inta=matcher.groupCount();
while((a--)>0)
{
Stringss=matcher.group(a);
sb.append(ss.substring(0,ss.indexOf(""")));
}
}
returnsb.toString();
}
publicstaticList<String>getWebCon(Stringdomain){
List<String>sb=newArrayList<String>();
try{
java.net.URLurl=newjava.net.URL(domain);
BufferedReaderin=newBufferedReader(newInputStreamReader(url
.openStream()));
Stringline="";
while((line=in.readLine())!=null){
line=newString(line.getBytes(),"UTF-8");
if(parseUrl(line)!=null&&parseUrl(line).length()>0){
sb.add(parseUrl(line));
}
}
in.close();
}catch(Exceptione){
System.err.println(e);
}
returnsb;
}
}
Ⅳ c#为什么str = sr.ReadLine();未能读取整行内容
分隔时出错,所以"123"没有被读取。解决方法:
1.把文本每行里的空格改为1个空格(读取时注意编码),比如:张三 123
2.用正则获取。
还有,你应该是先把流close再return的,不然执行不了流的close.
Ⅵ perl运行时出现报错下面是源代码和报错内容,
首先你将这两句
open IN,"1CHR.txt";
open OUTemp1,'>',"OutPDB.txt";
改成
open IN, "1CHR.txt" or die "Can't read file";
open OUTemp1, ">OutPDB.txt" or die "Can't create OUT";
看看有没有报出甚么错 ?
Ⅶ java.lang.NullPointerException at java.util.Properties$LineReader.readLine(Unknown Source) at java.
找不到配置.properties文件的位置。
Ⅷ java中如何根据一个网址获得该网页的源代码,急求
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class HttpTest {
String urlString;
public static void main(String[] args) throws Exception {
HttpTest client = new HttpTest(网址);
client.run();
}
public HttpTest(String urlString) {
this.urlString = urlString;
}
public void run() throws Exception {
//生成一个URL对象
URL url = new URL(urlString);
//打开URL
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
//得到输入流,即获得了网页的内容
BufferedReader reader = new BufferedReader(new InputStreamReader(urlConnection
.getInputStream()));
String line;
// 读取输入流的数据,并显示
while ((line = reader.readLine()) != null){
System.out.println(line);
}
}
}
Ⅸ 求朴素贝叶斯算法源码
ICTCLAS中文分词for Lucene.Net接口代码(实现Analyzer):
1using System;
2using System.Collections.Generic;
3using System.Text;
4using System.IO;
5
6using Lucene.Net.Analysis;
7using Lucene.Net.Analysis.Standard;
8
9namespace AspxOn.Search.FenLei
10{
11
12 /**//// <summary>
13 /// ICTCLAS分词组件for Lucene.net接口
14 /// </summary>
15 public class ICTCLASAnalyzer : Analyzer
16 {
17 //定义要过滤的词
18 public static readonly System.String[] CHINESE_ENGLISH_STOP_WORDS = new string[428];
19 public string NoisePath = Environment.CurrentDirectory + "\\data\\stopwords.txt";
20
21 public ICTCLASAnalyzer()
22 {
23 StreamReader reader = new StreamReader(NoisePath, System.Text.Encoding.Default);
24 string noise = reader.ReadLine();
25 int i = 0;
26
27 while (!string.IsNullOrEmpty(noise))
28 {
29 CHINESE_ENGLISH_STOP_WORDS[i] = noise;
30 noise = reader.ReadLine();
31 i++;
32 }
33
34 }
35
36 /**//**//**//// Constructs a {@link StandardTokenizer} filtered by a {@link
37 /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
38 ///
39 public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
40 {
41 TokenStream result = new ICTCLASTokenizer(reader);
42 result = new StandardFilter(result);
43 result = new LowerCaseFilter(result);
44 result = new StopFilter(result, CHINESE_ENGLISH_STOP_WORDS);
45 return result;
46 }
47
48
49 }
50}
ICTCLAS中文分词for Lucene.Net接口代码(实现Tokenizer):
1using System;
2using System.Collections.Generic;
3using System.Text;
4
5using Lucene.Net.Analysis;
6using SharpICTCLAS;
7using System.IO;
8
9namespace AspxOn.Search.FenLei
10{
11 public class ICTCLASTokenizer : Tokenizer
12 {
13 int nKind = 1;
14 List<WordResult[]> result;
15 int startIndex = 0;
16 int endIndex = 0;
17 int i = 1;
18 /**//**/
19 /**////
20 /// 待分词的句子
21 ///
22 private string sentence;
23 /**//**/
24 /**//// Constructs a tokenizer for this Reader.
25 public ICTCLASTokenizer(System.IO.TextReader reader)
26 {
27 this.input = reader;
28 sentence = input.ReadToEnd();
29 sentence = sentence.Replace("\r\n", "");
30 string DictPath = Path.Combine(Environment.CurrentDirectory, "Data") + Path.DirectorySeparatorChar;
31 //Console.WriteLine("正在初始化字典库,请稍候");
32 WordSegment wordSegment = new WordSegment();
33 wordSegment.InitWordSegment(DictPath);
34 result = wordSegment.Segment(sentence, nKind);
35 }
36
37 /**//**/
38 /**//// 进行切词,返回数据流中下一个token或者数据流为空时返回null
39 ///
40 public override Token Next()
41 {
42 Token token = null;
43 while (i < result[0].Length - 1)
44 {
45 string word = result[0][i].sWord;
46 endIndex = startIndex + word.Length - 1;
47 token = new Token(word, startIndex, endIndex);
48 startIndex = endIndex + 1;
49
50 i++;
51 return token;
52
53 }
54 return null;
55 }
56
57 }
58}
中文分词器代码:
1using System;
2using System.Collections.Generic;
3using System.Text;
4using System.IO;
5
6using Lucene.Net.Analysis;
7using Lucene.Net.Analysis.Standard;
8using Lucene.Net.Documents;
9
10using Lucene.Net.Analysis.Cn;
11using Lucene.Net.Analysis.KTDictSeg;
12
13namespace AspxOn.Search.FenLei
14{
15 /**//// <summary>
16 /// 中文分词器
17 /// </summary>
18 public class ChineseSpliter
19 {
20 public static string Split(string text, string splitToken)
21 {
22 StringBuilder sb = new StringBuilder();
23
24 Analyzer an = new ICTCLASAnalyzer();
25
26 //TokenStream ts = an.ReusableTokenStream("", new StringReader(text));
27
28 TokenStream ts = an.TokenStream("", new StringReader(text));
29
30 Lucene.Net.Analysis.Token token;
31 while ((token = ts.Next()) != null)
32 {
33 sb.Append(splitToken + token.TermText());
34 }
35
36 return sb.ToString().Substring(1);
37 }
38 }
39}
先验概率计算代码:
1using System;
2using System.Collections.Generic;
3using System.Text;
4
5namespace AspxOn.Search.FenLei
6{
7 /**//// <summary>
8 /// 先验概率(事先概率)计算
9 /// </summary>
10 public class PriorProbability
11 {
12 private static TrainingDataManager tdm = new TrainingDataManager();
13
14 /**//// <summary>
15 /// 计算先验概率
16 /// </summary>
17 /// <param name="c">给定的分类</param>
18 /// <returns>给定条件下的先验概率</returns>
19 public static float CaculatePc(string c)
20 {
21 float ret = 0F;
22 float Nc = tdm.(c);
23 float N = tdm.GetTrainFileCount();
24 ret = Nc / N;
25 return ret;
26 }
27 }
28}
1using System;
2using System.Collections.Generic;
3using System.Text;
4
5namespace AspxOn.Search.FenLei
6{
7 /**//// <summary>
8 /// 条件概率计算
9 /// </summary>
10 public class ClassConditionalProbability
11 {
12
13 private static TrainingDataManager tdm = new TrainingDataManager();
14 private static float M = 0F;
15
16 /**//// <summary>
17 /// 类条件概率
18 /// </summary>
19 /// <param name="x">给定关键字</param>
20 /// <param name="c">给定分类</param>
21 /// <returns></returns>
22 public static float CaculatePxc(string x, string c)
23 {
24 float ret = 0F;
25 float Nxc = tdm.(c, x);
26 float Nc = tdm.(c);
27 float V = tdm.GetTrainingClassifications().Length;
28
29 ret = (Nxc + 1) / (Nc + V + M);//为避免出现0这样的极端情况,进行加权处理
30
31 return ret;
32 }
33 }
34}
Ⅹ 求一个网站计数器源代码,不要附带网站链接的那种,真正免费的.
这是计数器的代码,可以从网上搜一下,有不同的解决方法。
<%
CountFile=Server.MapPath("txtcounter.txt")
Set FileObject=Server.CreateObject("Scripting.FileSystemObject")
Set Out=FileObject.OpenTextFile(CountFile,1,FALSE,FALSE)
counter=Out.ReadLine
Out.Close
SET FileObject=Server.CreateObject("Scripting.FileSystemObject")
Set Out=FileObject.CreateTextFile(CountFile,TRUE,FALSE)
Application.lock
counter= counter + 1
Out.WriteLine(counter)
Application.unlock
Response.Write"document.write("&counter&")"
Out.Close
%>
然后在同一个文件夹下建立一个txtcounter.txt的文本文件,在里面填上数字,随便什么都行,就从填入的这个数据开始计数