Created
November 21, 2017 07:53
-
-
Save 2efPer/5bac5bbe21e3a40d30984dd6e02a1373 to your computer and use it in GitHub Desktop.
IK,Usage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import org.apache.lucene.analysis.TokenStream; | |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | |
| import org.wltea.analyzer.lucene.IKAnalyzer; | |
| import java.io.*; | |
| public class IkAnalyserUsageTest { | |
| public static void main (String[] args) { | |
| BufferedReader br =null; | |
| BufferedWriter bw = null; | |
| try { | |
| br = new BufferedReader(new FileReader(new File("todofile"))); | |
| bw = new BufferedWriter(new FileWriter(new File("result"))); | |
| } catch (IOException e) { | |
| e.printStackTrace(); | |
| } | |
| String s = null; | |
| IKAnalyzer analyzer =new IKAnalyzer(true); | |
| TokenStream ts= null; | |
| long counter = 0; | |
| try{ | |
| while((s=br.readLine())!=null){ | |
| counter++; | |
| s.replace("\\r",""); | |
| s.replace("\\n",""); | |
| s.replace("\\",""); | |
| s.replace("\\\\",""); | |
| String[] content = s.split("__SOB__"); | |
| String todoContent =null; | |
| if(content.length==2){ | |
| todoContent=content[1]; | |
| }else if(content.length==3){ | |
| todoContent=content[1]+" "+content[2]; | |
| }else{ | |
| continue; | |
| } | |
| StringReader reader=new StringReader(todoContent); | |
| ts = analyzer.tokenStream("", reader); | |
| CharTermAttribute term=ts.getAttribute(CharTermAttribute.class); | |
| ts.reset(); | |
| StringBuilder sb = new StringBuilder(); | |
| //TODO | |
| sb.append(counter+"__SOB__"); | |
| while(ts.incrementToken()){ | |
| sb.append(term.toString()+"|"); | |
| } | |
| bw.write(sb.toString().trim().substring(0,sb.length()-1)+"\r\n"); | |
| bw.flush(); | |
| ts.close(); | |
| } | |
| }catch (Exception e){ | |
| System.out.println("fuckyou"); | |
| e.printStackTrace(); | |
| } | |
| try { | |
| br.close(); | |
| bw.close(); | |
| } catch (IOException e) { | |
| e.printStackTrace(); | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment