nlp - How to use OpenNLP with Java? -


i want postag english sentence , processing. use opennlp. have installed

when execute command

i:\workshop\programming\nlp\opennlp-tools-1.5.0-bin\opennlp-tools-1.5.0>java -jar opennlp-tools-1.5.0.jar postagger models\en-pos-maxent.bin < text.txt 

it gives output postagging input in text.txt

    loading pos tagger model ... done (4.009s) my_prp$ name_nn is_vbz shabab_nnp i_fw am_vbp 22_cd years_nns old._.   average: 66.7 sent/s total: 1 sent runtime: 0.015s 

i hope installed properly?

now how do postagging inside java application? have added opennlptools, jwnl, maxent jar project how invoke postagging?

here's (old) sample code threw together, modernized code follow:

package opennlp;  import opennlp.tools.cmdline.performancemonitor; import opennlp.tools.cmdline.postag.posmodelloader; import opennlp.tools.postag.posmodel; import opennlp.tools.postag.possample; import opennlp.tools.postag.postaggerme; import opennlp.tools.tokenize.whitespacetokenizer; import opennlp.tools.util.objectstream; import opennlp.tools.util.plaintextbylinestream;  import java.io.file; import java.io.ioexception; import java.io.stringreader;  public class opennlptest { public static void main(string[] args) throws ioexception {     posmodel model = new posmodelloader().load(new file("en-pos-maxent.bin"));     performancemonitor perfmon = new performancemonitor(system.err, "sent");     postaggerme tagger = new postaggerme(model);      string input = "can me dig through opennlp's horrible documentation?";     objectstream<string> linestream =             new plaintextbylinestream(new stringreader(input));      perfmon.start();     string line;     while ((line = linestream.read()) != null) {          string whitespacetokenizerline[] = whitespacetokenizer.instance.tokenize(line);         string[] tags = tagger.tag(whitespacetokenizerline);          possample sample = new possample(whitespacetokenizerline, tags);         system.out.println(sample.tostring());          perfmon.incrementcounter();     }     perfmon.stopandprintfinalresult(); } } 

the output is:

loading pos tagger model ... done (2.045s) can_md anyone_nn help_vb me_prp dig_vb through_in opennlp's_nnp horrible_jj documentation?_nn  average: 76.9 sent/s  total: 1 sent runtime: 0.013s 

this working postaggertool class included part of opennlp. sample.gettags() string array has tag types themselves.

this requires direct file access training data, really, lame.

an updated codebase little different (and more useful.)

first, maven pom:

<?xml version="1.0" encoding="utf-8"?> <project xmlns="http://maven.apache.org/pom/4.0.0"          xmlns:xsi="http://www.w3.org/2001/xmlschema-instance"          xsi:schemalocation="http://maven.apache.org/pom/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">     <modelversion>4.0.0</modelversion>      <groupid>org.javachannel</groupid>     <artifactid>opennlp-example</artifactid>     <version>1.0-snapshot</version>     <dependencies>         <dependency>             <groupid>org.apache.opennlp</groupid>             <artifactid>opennlp-tools</artifactid>             <version>1.6.0</version>         </dependency>         <dependency>             <groupid>org.testng</groupid>             <artifactid>testng</artifactid>             <version>[6.8.21,)</version>             <scope>test</scope>         </dependency>     </dependencies>     <build>         <plugins>             <plugin>                 <groupid>org.apache.maven.plugins</groupid>                 <artifactid>maven-compiler-plugin</artifactid>                 <version>3.1</version>                 <configuration>                     <source>1.8</source>                     <target>1.8</target>                 </configuration>             </plugin>         </plugins>     </build> </project> 

and here's code, written test, therefore located in ./src/test/java/org/javachannel/opennlp/example:

package org.javachannel.opennlp.example;  import opennlp.tools.cmdline.performancemonitor; import opennlp.tools.postag.posmodel; import opennlp.tools.postag.possample; import opennlp.tools.postag.postaggerme; import opennlp.tools.tokenize.whitespacetokenizer; import org.testng.annotations.dataprovider; import org.testng.annotations.test;  import java.io.file; import java.io.fileoutputstream; import java.io.ioexception; import java.net.url; import java.nio.channels.channels; import java.nio.channels.readablebytechannel; import java.util.stream.stream;  public class postest {     private void download(string url, file destination) throws ioexception {         url website = new url(url);         readablebytechannel rbc = channels.newchannel(website.openstream());         fileoutputstream fos = new fileoutputstream(destination);         fos.getchannel().transferfrom(rbc, 0, long.max_value);     }      @dataprovider     object[][] getcorpusdata() {         return new object[][][]{{{                 "can me dig through opennlp's horrible documentation?"         }}};     }      @test(dataprovider = "getcorpusdata")     public void showpos(object[] input) throws ioexception {         file modelfile = new file("en-pos-maxent.bin");         if (!modelfile.exists()) {             system.out.println("downloading model.");             download("http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin", modelfile);         }         posmodel model = new posmodel(modelfile);         performancemonitor perfmon = new performancemonitor(system.err, "sent");         postaggerme tagger = new postaggerme(model);          perfmon.start();         stream.of(input).map(line -> {             string whitespacetokenizerline[] = whitespacetokenizer.instance.tokenize(line.tostring());             string[] tags = tagger.tag(whitespacetokenizerline);              possample sample = new possample(whitespacetokenizerline, tags);              perfmon.incrementcounter();             return sample.tostring();         }).foreach(system.out::println);         perfmon.stopandprintfinalresult();     } } 

this code doesn't test - it's smoke test, if - should serve starting point. (potentially) nice thing downloads model if don't have downloaded already.


Comments

Popular posts from this blog

objective c - Change font of selected text in UITextView -

php - Accessing POST data in Facebook cavas app -

c# - Getting control value when switching a view as part of a multiview -