<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
	<channel>
		<title><![CDATA[Latest posts for the topic "JAVA實作Search 一問 for Lucene"]]></title>
		<link>https://forum.andowson.com/posts/list/5.page</link>
		<description><![CDATA[Latest messages posted in the topic "JAVA實作Search 一問 for Lucene"]]></description>
		<generator>JForum - http://www.jforum.net</generator>
			<item>
				<title>JAVA實作Search 一問 for Lucene</title>
				<description><![CDATA[ 我無法搜尋我要的詞彙（鼻涕），我這是有少打甚麼？為何只會搜尋到０筆資料。 
<br>
可以給個起手式嗎？ 
<br>
<br>
懇請賜教～ 
<br>
<br>
Lucene 3.0.0 API 
<br>
[url]http://www.jarvana.com/jarvana/view/org/apache/lucene/lucene-core/3.0.0/lucene-core-3.0.0-javadoc.jar!/index.html?org/apache/lucene/util/Version.html[/url] 
<br>
<br>
<br>
[code] 
<br>
[Index code] 
<br>
<br>
import java.io.File; 
<br>
import java.io.FileReader; 
<br>
import java.io.IOException; 
<br>
import java.util.Date; 
<br>
<br>
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
<br>
import org.apache.lucene.document.DateTools; 
<br>
import org.apache.lucene.document.Document; 
<br>
import org.apache.lucene.document.Field; 
<br>
import org.apache.lucene.index.IndexWriter; 
<br>
import org.apache.lucene.store.Directory; 
<br>
import org.apache.lucene.store.SimpleFSDirectory; 
<br>
import org.apache.lucene.util.Version; 
<br>
<br>
public class Indexer 
<br>
{ 
<br>
 public static void main(String[] args) throws IOException 
<br>
 { 
<br>
 String Idx = "C:\\test\\Idx"; 
<br>
 String dateDir = "C:\\test\\Data"; 
<br>
 IndexWriter indexWriter = null; 
<br>
<br>
 Directory dir = new SimpleFSDirectory(new File(Idx)); 
<br>
 indexWriter = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_30),true,IndexWriter.MaxFieldLength.UNLIMITED); 
<br>
<br>
 File[] files = new File(dateDir).listFiles(); 
<br>
<br>
 for (int i = 0; i &lt; files.length; i++) 
<br>
 { 
<br>
 Document doc = new Document(); 
<br>
 doc.add(new Field("contents", new FileReader(files[i]))); 
<br>
 doc.add(new Field("filename", files[i].getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); 
<br>
 doc.add(new Field("indexDate",DateTools.dateToString(new Date(), DateTools.Resolution.DAY),Field.Store.YES,Field.Index.NOT_ANALYZED)); 
<br>
 indexWriter.addDocument(doc); 
<br>
 } 
<br>
 System.out.println("numDocs"+indexWriter.numDocs()); 
<br>
 indexWriter.close(); 
<br>
<br>
 } 
<br>
<br>
} 
<br>
[END] 
<br>
<br>
[Searcher code] 
<br>
<br>
import java.io.File; 
<br>
import java.io.IOException; 
<br>
<br>
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
<br>
import org.apache.lucene.document.Document; 
<br>
import org.apache.lucene.queryParser.ParseException; 
<br>
import org.apache.lucene.queryParser.QueryParser; 
<br>
import org.apache.lucene.search.IndexSearcher; 
<br>
import org.apache.lucene.search.Query; 
<br>
import org.apache.lucene.search.ScoreDoc; 
<br>
import org.apache.lucene.search.TopDocs; 
<br>
import org.apache.lucene.store.Directory; 
<br>
import org.apache.lucene.store.SimpleFSDirectory; 
<br>
import org.apache.lucene.util.Version; 
<br>
<br>
public class Searcher 
<br>
{ 
<br>
 public static void main(String[] args) throws IOException, ParseException 
<br>
 { 
<br>
 String Idx = "C:\\test\\Idx"; 
<br>
 Directory dir = new SimpleFSDirectory(new File(Idx)); 
<br>
 IndexSearcher indexSearch = new IndexSearcher(dir); 
<br>
<br>
 QueryParser queryParser = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30)); 
<br>
 Query query = queryParser.parse("鼻涕"); //key Query term 
<br>
 TopDocs hits = indexSearch.search(query, 500); 
<br>
 System.out.println("找到"+hits.totalHits+"個"); 
<br>
 for (int i = 0; i &lt; hits.scoreDocs.length; i++) 
<br>
 { 
<br>
 ScoreDoc sdoc = hits.scoreDocs[i]; 
<br>
 Document doc = indexSearch.doc(sdoc.doc); 
<br>
 System.out.println(doc.get("filename")); 
<br>
 } 
<br>
 indexSearch.close(); 
<br>
 } 
<br>
} 
<br>
[END] 
<br>
[/code] 
<br>
<br>
訊息回應如下圖： 
<br>
http://img263.imageshack.us/f/bug03.jpg/ 
<br>
<br>]]></description>
				<guid isPermaLink="true">https://forum.andowson.com/posts/preList/467/961.page</guid>
				<link>https://forum.andowson.com/posts/preList/467/961.page</link>
				<pubDate><![CDATA[Wed, 19 Jan 2011 14:34:00]]> GMT</pubDate>
				<author><![CDATA[ crc2121]]></author>
			</item>
			<item>
				<title>回覆:JAVA實作Search 一問 for Lucene</title>
				<description><![CDATA[ 這個程式我測試過應該沒問題才對 
<br>
1.首先下載[url=http://ftp.stut.edu.tw/var/ftp/pub/OpenSource/apache//lucene/java/lucene-3.0.3.zip]Lucene 3.0.3[/url]，然後解壓縮，並將下列jar檔加入CLASSPATH 
<br>
lucene-3.0.3\lucene-core-3.0.3.jar 
<br>
lucene-3.0.3\contrib\analyzers\common\lucene-analyzers-3.0.3.jar 
<br>
<br>
2.接著建立C:\test目錄，並在C:\test底下再建立兩個子目錄Idx和Data 
<br>
<br>
3.接著透過Google搜尋[google]鼻涕[/google]，找到任何一篇文章後，將它的內容另存成一個文字檔，如1.txt，存到C:\test\Data目錄下。 
<br>
<br>
4.執行Indexer 
<br>
<br>
5.再執行Searcher 
<br>
<br>
<br>
<br>
<br>
<br>]]></description>
				<guid isPermaLink="true">https://forum.andowson.com/posts/preList/467/962.page</guid>
				<link>https://forum.andowson.com/posts/preList/467/962.page</link>
				<pubDate><![CDATA[Thu, 20 Jan 2011 13:38:22]]> GMT</pubDate>
				<author><![CDATA[ andowson]]></author>
			</item>
			<item>
				<title>回覆:JAVA實作Search 一問 for Lucene</title>
				<description><![CDATA[ 我知道我錯哪邊了囧rz... 
<br>
編碼的屬性是UTF-8應更改為ANSI才對囧rz...]]></description>
				<guid isPermaLink="true">https://forum.andowson.com/posts/preList/467/963.page</guid>
				<link>https://forum.andowson.com/posts/preList/467/963.page</link>
				<pubDate><![CDATA[Thu, 20 Jan 2011 15:23:16]]> GMT</pubDate>
				<author><![CDATA[ crc2121]]></author>
			</item>
			<item>
				<title>回覆:JAVA實作Search 一問 for Lucene</title>
				<description><![CDATA[ 關於您問到的「如果在搜尋裡面增加，所搜尋到的文件且呈現內文應該怎麼打？ 」 
<br>
我想可以用高亮方式來處理： 
<br>
Searcher.java: 
<br>
[code] 
<br>
import java.io.BufferedInputStream; 
<br>
import java.io.File; 
<br>
import java.io.FileInputStream; 
<br>
import java.io.IOException; 
<br>
import java.io.StringReader; 
<br>
<br>
import org.apache.lucene.analysis.TokenStream; 
<br>
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
<br>
import org.apache.lucene.document.Document; 
<br>
import org.apache.lucene.queryParser.ParseException; 
<br>
import org.apache.lucene.queryParser.QueryParser; 
<br>
import org.apache.lucene.search.IndexSearcher; 
<br>
import org.apache.lucene.search.Query; 
<br>
import org.apache.lucene.search.ScoreDoc; 
<br>
import org.apache.lucene.search.TopDocs; 
<br>
import org.apache.lucene.search.highlight.Highlighter; 
<br>
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 
<br>
import org.apache.lucene.search.highlight.QueryScorer; 
<br>
import org.apache.lucene.search.highlight.Scorer; 
<br>
import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 
<br>
import org.apache.lucene.store.Directory; 
<br>
import org.apache.lucene.store.SimpleFSDirectory; 
<br>
import org.apache.lucene.util.Version; 
<br>
<br>
public class Searcher { 
<br>
 public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException { 
<br>
 String Idx = "C:\\test\\Idx"; 
<br>
 String dataDir = "C:\\test\\Data"; 
<br>
 Directory dir = new SimpleFSDirectory(new File(Idx)); 
<br>
 IndexSearcher indexSearch = new IndexSearcher(dir); 
<br>
<br>
 QueryParser queryParser = new QueryParser(Version.LUCENE_30, 
<br>
 "contents", new StandardAnalyzer(Version.LUCENE_30)); 
<br>
 Query query = queryParser.parse("鼻涕"); // key Query term 
<br>
 TopDocs hits = indexSearch.search(query, 500); 
<br>
 System.out.println("找到" + hits.totalHits + "個"); 
<br>
 for (int i = 0; i &lt; hits.scoreDocs.length; i++) { 
<br>
 ScoreDoc sdoc = hits.scoreDocs[i]; 
<br>
 Document doc = indexSearch.doc(sdoc.doc); 
<br>
 System.out.println(doc.get("filename")); 
<br>
 Scorer scorer = new QueryScorer(query); 
<br>
 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color="\&quot;red\&quot;">", "</font></b>"); 
<br>
 Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer); 
<br>
 String content = readFileAsString(dataDir+File.separator+doc.get("filename")); 
<br>
 TokenStream tokenStream = queryParser.getAnalyzer().tokenStream( 
<br>
 "contents", new StringReader(content)); 
<br>
<br>
 String fragment = highlighter.getBestFragment(tokenStream, content); 
<br>
 System.out.println(fragment != null ? fragment : content); 
<br>
<br>
 } 
<br>
 indexSearch.close(); 
<br>
 } 
<br>
 private static String readFileAsString(String filePath) throws java.io.IOException{ 
<br>
 byte[] buffer = new byte[(int) new File(filePath).length()]; 
<br>
 BufferedInputStream f = null; 
<br>
 try { 
<br>
 f = new BufferedInputStream(new FileInputStream(filePath)); 
<br>
 f.read(buffer); 
<br>
 } finally { 
<br>
 if (f != null) try { f.close(); } catch (IOException ignored) { } 
<br>
 } 
<br>
 return new String(buffer); 
<br>
 } 
<br>
} 
<br>
[/code] 
<br>
需另外加入下列jar檔到CLASSPATH: 
<br>
lucene-3.0.3\contrib\highlighter\lucene-highlighter-3.0.3.jar 
<br>
lucene-3.0.3\contrib\memory\lucene-memory-3.0.3.jar]]></description>
				<guid isPermaLink="true">https://forum.andowson.com/posts/preList/467/966.page</guid>
				<link>https://forum.andowson.com/posts/preList/467/966.page</link>
				<pubDate><![CDATA[Thu, 20 Jan 2011 19:21:53]]> GMT</pubDate>
				<author><![CDATA[ andowson]]></author>
			</item>
	</channel>
</rss>