--- core/src/dualist/pipes/DocumentPipe.java.orig	2012-02-11 05:07:28.000000000 +0900
+++ core/src/dualist/pipes/DocumentPipe.java	2012-02-22 22:44:45.000000000 +0900
@@ -13,6 +13,8 @@
 import cc.mallet.pipe.TokenSequenceRemoveStopwords;
 import cc.mallet.types.Instance;
 
+import dualist.pipes.SimpleMecabPipe;
+
 public class DocumentPipe extends Pipe {
 
     private Pipe myPipe = new SerialPipes(new Pipe[] {
@@ -24,6 +26,9 @@
             new CharSequenceReplace(Pattern.compile("&(.*?);"), ""),
             new CharSequenceReplace(Pattern.compile("[0-9]+"), "00"),
             new CharSequenceLowercase(),
+            (System.getProperty("dualist.lang") != null &&
+             System.getProperty("dualist.lang").equals("ja")) ?
+            new SimpleMecabPipe() :
 //            new CharSequence2TokenSequence(CharSequenceLexer.LEX_WORD_CLASSES),
             new CharSequence2TokenSequence("[\\p{L}\\p{Mn}]+"),
             new TokenSequenceRemoveStopwords(),
--- build.xml.orig	2012-03-08 23:07:56.000000000 +0900
+++ build.xml	2012-03-09 09:32:14.000000000 +0900
@@ -26,7 +26,7 @@
     <target name="compile">
         <mkdir dir="${classes.dir}"/>
         <!-- <javac srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath"/> -->
-        <javac debug="true" debuglevel="lines,vars,source" srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath"/>
+        <javac debug="true" debuglevel="lines,vars,source" srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath" encoding="UTF-8"/>
     </target>
 
     <target name="jar" depends="compile">