Serializing Stanford Parser Objects.

Recently, I found the need to serialize Stanford Parser objects to a file. Though I was familiar with the concept of serialization, I had never done such a thing in Java before. The following is an example on how to do just that: serializing Stanford Parser Tree objects to a file.

The example accomplishes the simple task of lexing and parsing a simple sentence. Instead of printing the tree right now, though, we’ll serialize it to a file for use later on.

import java.io.CharArrayReader;
import java.io.FileOutputStream;
import java.io.ObjectOutputStream;
import java.util.List;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.process.Tokenizer;
 
class Serialize {
   public static void main(String[] args) {
      LexicalizedParser lp = 
        new LexicalizedParser("englishPCFG.ser.gz");
      TreebankLanguagePack tlp =
        new PennTreebankLanguagePack();
      lp.setOptionFlags(
        new String[] { "-maxLength", "80", "-retainTmpSubcategories" });
      Tree parse = null;
 
      CharArrayReader reader = 
        new CharArrayReader("Cows eat grain.".toCharArray());
      Tokenizer<? extends HasWord> toke = 
        tlp.getTokenizerFactory().getTokenizer(reader);
      List<? extends HasWord> wordList = toke.tokenize();
 
      if (lp.parse(wordList)) {
         parse = lp.getBestParse();
      }
 
      try {
         /*
          * The following serializes the Tree parse object
          *  to the file serialized.obj.  Later on, we'll
          *  read this file in another JVM.
          */
         FileOutputStream fos = new FileOutputStream("serialized.obj");
         ObjectOutputStream oos = new ObjectOutputStream(fos);
         oos.writeObject(parse);
         oos.flush();
         oos.close();
      } catch (Exception e) {
         e.printStackTrace();
      }
   }
}

At this point we’ve created a binary file named serialized.obj. This file contains the contents of the Stanford Parser Tree object. We can deserialize the object and continue using it as before.

import java.io.FileInputStream;
import java.io.ObjectInputStream;
import edu.stanford.nlp.trees.*;
class Deserialize {
   public static void main(String[] args) {
      Tree parse = null;
      try {
         /*
          * The following deserializes a Stanford Parser
          *  Tree object that we created previously.
          */
         FileInputStream fis = new FileInputStream("object.bin");
         ObjectInputStream ois = new ObjectInputStream(fis);
         parse = (Tree) ois.readObject();
         ois.close();
         parse.pennPrint();
      } catch (Exception e) {
         e.printStackTrace();
      }
   }
}
This entry was posted in NLP, Programming and tagged , , . Bookmark the permalink. Comments are closed, but you can leave a trackback: Trackback URL.