Skip to content

Instantly share code, notes, and snippets.

@2efPer
Created November 21, 2017 07:53
Show Gist options
  • Select an option

  • Save 2efPer/5bac5bbe21e3a40d30984dd6e02a1373 to your computer and use it in GitHub Desktop.

Select an option

Save 2efPer/5bac5bbe21e3a40d30984dd6e02a1373 to your computer and use it in GitHub Desktop.
IK,Usage
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.*;
public class IkAnalyserUsageTest {
public static void main (String[] args) {
BufferedReader br =null;
BufferedWriter bw = null;
try {
br = new BufferedReader(new FileReader(new File("todofile")));
bw = new BufferedWriter(new FileWriter(new File("result")));
} catch (IOException e) {
e.printStackTrace();
}
String s = null;
IKAnalyzer analyzer =new IKAnalyzer(true);
TokenStream ts= null;
long counter = 0;
try{
while((s=br.readLine())!=null){
counter++;
s.replace("\\r","");
s.replace("\\n","");
s.replace("\\","");
s.replace("\\\\","");
String[] content = s.split("__SOB__");
String todoContent =null;
if(content.length==2){
todoContent=content[1];
}else if(content.length==3){
todoContent=content[1]+" "+content[2];
}else{
continue;
}
StringReader reader=new StringReader(todoContent);
ts = analyzer.tokenStream("", reader);
CharTermAttribute term=ts.getAttribute(CharTermAttribute.class);
ts.reset();
StringBuilder sb = new StringBuilder();
//TODO
sb.append(counter+"__SOB__");
while(ts.incrementToken()){
sb.append(term.toString()+"|");
}
bw.write(sb.toString().trim().substring(0,sb.length()-1)+"\r\n");
bw.flush();
ts.close();
}
}catch (Exception e){
System.out.println("fuckyou");
e.printStackTrace();
}
try {
br.close();
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment