paper progress
2011-10-31 13:36:08

1. crawl web data 20111031

2. refine crawled data 20111101

▼ more
work list
2011-10-28 09:49:45

졸업까지 졸업논문1, 학점1

수료까지 과제1, 기술역량1, 수료심사1

~12:00 스터디, 다운로드

2:00~ 압축 풀기, Index merging 시작

3:00~ 인터넷 연결 수리, 기계학습 공부 시작

▼ more
11/9
2011-10-26 16:46:52

▼ more
index merger
2011-10-26 14:11:05

package kr.ac.bike.se3.lucenemerge;

/*This program is free software: you can redistribute it and/or modify

it under the terms of the GNU General Public License as published by

the Free Software Foundation, either version 3 of the License, or

(at your option) any later version.

This program is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

GNU General Public License for more details.

You should have received a copy of the GNU General Public License

along with this program. If not, see .

Author: Asterios Katsifodimos (http://www.asteriosk.gr)

*/

import java.io.File;

import java.io.IOException;

import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

public class IndexMerger {

/** Index all text files under a directory. */

public static void main(String[] args) {

/*

if(args.length != 2){

System.out.println("Usage: java -jar IndexMerger.jar " +

"existing_indexes_dir merged_index_dir");

System.out.println(" existing_indexes_dir: A directory where the " +

"indexes that have to merged exist");

System.out.println(" e.g. indexes/");

System.out.println(" e.g. index1");

System.out.println(" e.g. index2");

System.out.println(" e.g. index3");

System.out.println(" merged_index_dir: A directory where the merged " +

"index will be stored");

System.out.println(" e.g. merged_indexes");

System.exit(1);

}

*/

File INDEXES_DIR = new File("d:\\indices");

File INDEX_DIR = new File("d:\\output"+System.currentTimeMillis());

INDEX_DIR.mkdir();

Date start = new Date();

try {

IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,new StandardAnalyzer(Version.LUCENE_31));

IndexWriter writer = new IndexWriter(FSDirectory.open( INDEX_DIR),iwc);

//writer.setMergeFactor(1000);

//writer.setRAMBufferSizeMB(50);

Directory indexes[] = new Directory[INDEXES_DIR.list().length];

for (int i = 0; i < INDEXES_DIR.list().length; i++) {

System.out.println("Adding: " + INDEXES_DIR.list()[i]);

indexes[i] = FSDirectory.open(new File(INDEXES_DIR.getAbsolutePath()

+ "/" + INDEXES_DIR.list()[i]));

}

System.out.print("Merging added indexes...");

writer.addIndexes(indexes);

System.out.println("done");

System.out.print("Optimizing index...");

writer.optimize();

writer.close();

System.out.println("done");

Date end = new Date();

System.out.println("It took: "+((end.getTime() - start.getTime()) / 1000)+ "\"");

} catch (IOException e) {

e.printStackTrace();

}

}

}

▼ more