A simple example about full-text search based Java:Lucene-JSP教程,Java技巧及代码-IDC资讯中心

索引源代码：
package lucene;

/**
* title: 
* description: 
* copyright: copyright (c) 2003
* company: 
* @author shirley
* @version 1.0
*/

import org.apache.lucene.index.*;
import org.apache.lucene.analysis.*;
import java.io.*;
import org.apache.lucene.document.*;

public class indexfiles {
  //使用方法：: indexfiles [索引输出目录] [索引的文件列表] …
  public static void main(string[] arg) throws exception {
    string[] args = new string[2];
    //索引后存放索引信息的路径
    args[0] = system.getproperty("java.io.tmpdir", "tmp") + system.getproperty("file.separator") + "index-1";
    //待索引文件
    args[1] = "e:\\appwork\\lucene\\rfc2047.txt";
    args[2] = "e:\\appwork\\cyberoffice\\co\\sheldon java mail.htm";
    args[3] = "e:\\appwork\\lucene\\englishtest.doc";
    args[4] = "e:\\appwork\\cyberoffice\\co\\xls1.xls";
    args[5] = "e:\\appwork\\cyberoffice\\co\\ppt1.ppt";

    string indexpath = args[0];
    indexwriter writer;
    //用指定的语言分析器构造一个新的写索引器（第3个参数表示是否为追加索引）
    writer = new indexwriter(indexpath, new simpleanalyzer(), false);

for (int i=1; i<args.length; i++) {
 system.out.println("indexing file " + args[i]);
 inputstream is = new fileinputstream(args[i]);

      //构造包含2个字段field的document对象
      //一个是路径path字段，不索引，只存储
      //一个是内容body字段，进行全文索引，并存储
      document doc = new document();
      doc.add(field.unindexed("path", args[i]));
      doc.add(field.text("body", (reader) new inputstreamreader(is)));
      //将文档写入索引
      writer.adddocument(doc);
      is.close();
    };
    //关闭写索引器
    writer.close();
  }
}

搜索源代码：
package lucene;

/**
* title: 
* description: 
* copyright: copyright (c) 2003
* company: 
* @author shirley
* @version 1.0
*/

import org.apache.lucene.search.*;
import org.apache.lucene.queryparser.*;
import org.apache.lucene.analysis.*;

public class search {
  public static void main(string[] arg) throws exception {
    string[] args = new string[2];
    //索引后存放索引信息的路径
    args[0] = system.getproperty("java.io.tmpdir", "tmp") + system.getproperty("file.separator") + "index-1";
    //搜索關鍵字
    args[1] = "sending";

string indexpath = args[0];
string querystring = args[1];

//指向索引目录的搜索器
 searcher searcher = new indexsearcher(indexpath);
 //查询解析器：使用和索引同样的语言分析器
 query query = queryparser.parse(querystring, "body", new simpleanalyzer());
 //搜索结果使用hits存储
 hits hits = searcher.search(query);
 //通过hits可以访问到相应字段的数据和查询的匹配度
 for (int i=0; i<hits.length(); i++) {
 system.out.println(hits.doc(i).get("path") + "; score: " + hits.score(i));
 };
 }
}

注：目前程序只支持英文索引，可以过滤文件类型为.txt .doc .htm .xls .ppt

中文索引及其它类型文件的索引正在研究中……

A simple example about full-text search based Java:Lucene-JSP教程,Java技巧及代码

相关推荐

热门标签

热门文章

分类目录