[LutinJ2R-commits] r74 - in lutinj2r/trunk/src: main/java/org/codelutin/j2r/types test/java/org/codelutin/j2r
Author: jcouteau Date: 2009-05-03 09:18:40 +0000 (Sun, 03 May 2009) New Revision: 74 Modified: lutinj2r/trunk/src/main/java/org/codelutin/j2r/types/RDataFrame.java lutinj2r/trunk/src/test/java/org/codelutin/j2r/DataframeTest.java Log: Add csv import/export to dataframes Modified: lutinj2r/trunk/src/main/java/org/codelutin/j2r/types/RDataFrame.java =================================================================== --- lutinj2r/trunk/src/main/java/org/codelutin/j2r/types/RDataFrame.java 2009-05-01 09:47:16 UTC (rev 73) +++ lutinj2r/trunk/src/main/java/org/codelutin/j2r/types/RDataFrame.java 2009-05-03 09:18:40 UTC (rev 74) @@ -1,5 +1,27 @@ +/* *##% Lutin Java-2-R library + * Copyright (C) 2006 - 2008 CodeLutin + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Lesser Public License for more details. + * + * You should have received a copy of the GNU General Lesser Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/lgpl-3.0.html>. ##%*/ + package org.codelutin.j2r.types; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; import java.io.Serializable; import java.util.HashMap; import java.util.Set; @@ -128,7 +150,7 @@ public void setData(Vector<Vector<? extends Serializable>> data, REngine engine) throws RException { this.data = data; - if(checkConsistency()==false){ + if (checkConsistency() == false) { throw new RException("A dimension is wrong on the dataframe"); } } @@ -219,7 +241,7 @@ Vector<Serializable> thisColumn = new Vector<Serializable>(); for (int j = 0; j < vectorlength; j++) { thisColumn.add((Serializable) engine.eval(this.variable + "[" - + (i + 1) + "," + (j + 1) + "]")); + + (j + 1) + "," + (i + 1) + "]")); } data.add(thisColumn); } @@ -236,8 +258,8 @@ + this.variable + ")$" + key + ")"); attributes.put(key, attribute); } - - if(checkConsistency()==false){ + + if (checkConsistency() == false) { throw new RException("A dimension is wrong on the dataframe"); } @@ -428,4 +450,276 @@ return true; } + public void exportCsv(File outputFile, boolean rowNames, boolean names) { + try { + BufferedWriter file = new BufferedWriter(new FileWriter(outputFile)); + + if (names) { + if (rowNames) { + file.write(";"); + } + for (int i = 0; i < this.names.size(); i++) { + file.write(this.names.get(i) + ";"); + } + file.newLine(); + } + + for (int i = 0; i < this.data.get(0).size(); i++) { + if (rowNames) { + file.write(this.rowNames.get(i) + ";"); + } + for (int j = 0; j < this.data.size(); j++) { + file.write(this.data.get(j).get(i) + ";"); + } + file.newLine(); + } + file.close(); + } catch (Exception e) { + e.printStackTrace(); + } + + } + + /** + * Import a dataframe form a csv file. The dataframe will contain Strings. + * Use this method if you don't know the type of data that is in the csv + * file. + * + * @param inputFile + * Csv file to import. + * @param rowNames + * Does the csv file contains names of the rows. + * @param names + * Does the csv file contain names of the columns. + */ + public void importCsv(File inputFile, boolean rowNames, boolean names) { + String tmp; + Integer dataSize = 0; + try { + + BufferedReader first = new BufferedReader(new FileReader(inputFile)); + tmp = first.readLine(); + String[] splitted = tmp.split("\\;"); + if (rowNames) { + dataSize = splitted.length - 1; + } else { + dataSize = splitted.length; + } + + BufferedReader br = new BufferedReader(new FileReader(inputFile)); + if (this.data != null) { + this.data.clear(); + } + if (rowNames) { + if (this.rowNames != null) { + this.rowNames.clear(); + } + } + if (names) { + if (this.names != null) { + this.names.clear(); + } + tmp = br.readLine(); + splitted = tmp.split("\\;"); + for (int i = 1; i < splitted.length; i++) { + this.names.add(splitted[i]); + } + } + + Vector<Vector<? extends Serializable>> data = new Vector<Vector<? extends Serializable>>(); + + for (int i = 0; i < dataSize; i++) { + Vector<Serializable> vector = new Vector<Serializable>(); + data.add(vector); + } + + while ((tmp = br.readLine()) != null) { + splitted = tmp.split("\\;"); + int index = 0; + if (rowNames) { + this.rowNames.add(splitted[0]); + index = 1; + } + for (int i = 0 + index; i < splitted.length; i++) { + ((Vector<Serializable>) data.get(i - index)) + .add((Serializable) splitted[i]); + } + } + br.close(); + this.data = data; + } catch (Exception e) { + e.printStackTrace(); + } + } + + /** + * Import a dataframe form a csv file. The dataframe will contain Objects of + * the same class than importType. Use this method if you know the type of + * data that is in the csv file. + * + * @param inputFile + * Csv file to import. + * @param rowNames + * Does the csv file contains names of the rows. + * @param names + * Does the csv file contain names of the columns. + * @param importType + * Object of the class of the data imported (all the data have + * the same type). (Supported types : String, Double and Integer) + */ + public void importCsv(File inputFile, boolean rowNames, boolean names, + Object importType) { + String tmp; + Integer dataSize = 0; + try { + + BufferedReader first = new BufferedReader(new FileReader(inputFile)); + tmp = first.readLine(); + String[] splitted = tmp.split("\\;"); + if (rowNames) { + dataSize = splitted.length - 1; + } else { + dataSize = splitted.length; + } + + BufferedReader br = new BufferedReader(new FileReader(inputFile)); + if (this.data != null) { + this.data.clear(); + } + if (rowNames) { + if (this.rowNames != null) { + this.rowNames.clear(); + } + } + if (names) { + if (this.names != null) { + this.names.clear(); + } + tmp = br.readLine(); + splitted = tmp.split("\\;"); + for (int i = 1; i < splitted.length; i++) { + this.names.add(splitted[i]); + } + } + + Vector<Vector<? extends Serializable>> data = new Vector<Vector<? extends Serializable>>(); + + for (int i = 0; i < dataSize; i++) { + Vector<Serializable> vector = new Vector<Serializable>(); + data.add(vector); + } + + while ((tmp = br.readLine()) != null) { + splitted = tmp.split("\\;"); + int index = 0; + if (rowNames) { + this.rowNames.add(splitted[0]); + index = 1; + } + for (int i = 0 + index; i < splitted.length; i++) { + if (importType instanceof String) { + ((Vector<Serializable>) data.get(i - index)) + .add((Serializable) splitted[i]); + } else if (importType instanceof Double) { + ((Vector<Serializable>) data.get(i - index)).add(Double + .valueOf(splitted[i])); + } else if (importType instanceof Integer) { + ((Vector<Serializable>) data.get(i - index)) + .add(Integer.valueOf(splitted[i])); + } + } + } + br.close(); + this.data = data; + } catch (Exception e) { + e.printStackTrace(); + } + } + + /** + * Import a dataframe form a csv file. The dataframe will contain Objects of + * the same class than each element of importTypes (one element = one + * column). Use this method if you know the type of data that is in the csv + * file. + * + * @param inputFile + * Csv file to import. + * @param rowNames + * Does the csv file contains names of the rows. + * @param names + * Does the csv file contain names of the columns. + * @param importTypes + * Vector of Object of the class of the data imported (the Vector + * match the data type of the columns). (Supported types : + * String, Double and Integer) + */ + public void importCsv(File inputFile, boolean rowNames, boolean names, + Vector<Object> importTypes) { + String tmp; + Integer dataSize = 0; + try { + + BufferedReader first = new BufferedReader(new FileReader(inputFile)); + tmp = first.readLine(); + String[] splitted = tmp.split("\\;"); + if (rowNames) { + dataSize = splitted.length - 1; + } else { + dataSize = splitted.length; + } + + BufferedReader br = new BufferedReader(new FileReader(inputFile)); + if (this.data != null) { + this.data.clear(); + } + if (rowNames) { + if (this.rowNames != null) { + this.rowNames.clear(); + } + } + if (names) { + if (this.names != null) { + this.names.clear(); + } + tmp = br.readLine(); + splitted = tmp.split("\\;"); + for (int i = 1; i < splitted.length; i++) { + this.names.add(splitted[i]); + } + } + + Vector<Vector<? extends Serializable>> data = new Vector<Vector<? extends Serializable>>(); + + for (int i = 0; i < dataSize; i++) { + Vector<Serializable> vector = new Vector<Serializable>(); + data.add(vector); + } + + while ((tmp = br.readLine()) != null) { + splitted = tmp.split("\\;"); + int index = 0; + if (rowNames) { + this.rowNames.add(splitted[0]); + index = 1; + } + for (int i = 0 + index; i < splitted.length; i++) { + if (importTypes.get(i - index) instanceof String) { + ((Vector<Serializable>) data.get(i - index)) + .add((Serializable) splitted[i]); + } else if (importTypes.get(i - index) instanceof Double) { + ((Vector<Serializable>) data.get(i - index)).add(Double + .valueOf(splitted[i])); + } else if (importTypes.get(i - index) instanceof Integer) { + ((Vector<Serializable>) data.get(i - index)) + .add(Integer.valueOf(splitted[i])); + } + } + } + br.close(); + this.data = data; + } catch (Exception e) { + e.printStackTrace(); + } + } + } Modified: lutinj2r/trunk/src/test/java/org/codelutin/j2r/DataframeTest.java =================================================================== --- lutinj2r/trunk/src/test/java/org/codelutin/j2r/DataframeTest.java 2009-05-01 09:47:16 UTC (rev 73) +++ lutinj2r/trunk/src/test/java/org/codelutin/j2r/DataframeTest.java 2009-05-03 09:18:40 UTC (rev 74) @@ -17,6 +17,7 @@ package org.codelutin.j2r; +import java.io.File; import java.io.Serializable; import java.util.HashMap; import java.util.Vector; @@ -166,15 +167,22 @@ dataframe2.getFrom("test", engine); //Test data - Assert.assertEquals(new Double(3.0), (Double) engine.eval("test[1,1]")); - Assert.assertEquals(new Double(4.5), (Double) engine.eval("test[2,1]")); Assert - .assertEquals(new Double(0.01), (Double) engine - .eval("test[3,1]")); - Assert.assertEquals(new Double(1.0), (Double) engine.eval("test[1,2]")); - Assert.assertEquals(new Double(5555555555555555555555.0), - (Double) engine.eval("test[2,2]")); - Assert.assertEquals(new Double(3.0), (Double) engine.eval("test[3,2]")); + .assertEquals(new Double(3.0), dataframe2.getData().get(0).get( + 0)); + Assert + .assertEquals(new Double(4.5), dataframe2.getData().get(0).get( + 1)); + Assert.assertEquals(new Double(0.01), dataframe2.getData().get(0) + .get(2)); + Assert + .assertEquals(new Double(1.0), dataframe2.getData().get(1).get( + 0)); + Assert.assertEquals(new Double(5555555555555555555555.0), dataframe2 + .getData().get(1).get(1)); + Assert + .assertEquals(new Double(3.0), dataframe2.getData().get(1).get( + 2)); //Test names Assert.assertEquals("column1", dataframe2.getNames().get(0)); Assert.assertEquals("column2", dataframe2.getNames().get(1)); @@ -285,4 +293,168 @@ } } + @Test + public void testImportExportCsv() throws Exception { + + //Test import with same type for each column. + + Vector<String> names = new Vector<String>(); + names.add("column1"); + names.add("column2"); + + Vector<String> rowNames = new Vector<String>(); + rowNames.add("row 1"); + rowNames.add("row 2"); + rowNames.add("row 3"); + + Vector<Double> column1 = new Vector<Double>(); + column1.add(3.0); + column1.add(4.5); + column1.add(0.01); + + Vector<Double> column2 = new Vector<Double>(); + column2.add(1.0); + column2.add(5555555555555555555555.0); + column2.add(3.0); + + Vector<Vector<? extends Serializable>> data = new Vector<Vector<? extends Serializable>>(); + data.add(column1); + data.add(column2); + + RDataFrame testDataFrame = new RDataFrame(); + try { + testDataFrame = new RDataFrame(names, rowNames, data, engine, + "test"); + } catch (RException eee) { + Assert.fail(); + } + testDataFrame.exportCsv(new File("/tmp/test.csv"), true, true); + + RDataFrame dataframe2 = new RDataFrame(); + dataframe2.importCsv(new File("/tmp/test.csv"), true, true, new Double( + 3.0)); + + //Test data + Assert + .assertEquals(new Double(3.0), dataframe2.getData().get(0).get( + 0)); + Assert + .assertEquals(new Double(4.5), dataframe2.getData().get(0).get( + 1)); + Assert.assertEquals(new Double(0.01), dataframe2.getData().get(0) + .get(2)); + Assert + .assertEquals(new Double(1.0), dataframe2.getData().get(1).get( + 0)); + Assert.assertEquals(new Double(5555555555555555555555.0), dataframe2 + .getData().get(1).get(1)); + Assert + .assertEquals(new Double(3.0), dataframe2.getData().get(1).get( + 2)); + //Test names + Assert.assertEquals("column1", dataframe2.getNames().get(0)); + Assert.assertEquals("column2", dataframe2.getNames().get(1)); + //Test row names + Assert.assertEquals("row 1", dataframe2.getRowNames().get(0)); + Assert.assertEquals("row 2", dataframe2.getRowNames().get(1)); + Assert.assertEquals("row 3", dataframe2.getRowNames().get(2)); + + //Test import with different types for each column + + names = new Vector<String>(); + names.add("column1"); + names.add("column3"); + names.add("column4"); + + rowNames = new Vector<String>(); + rowNames.add("row 1"); + rowNames.add("row 2"); + rowNames.add("row 3"); + + column1 = new Vector<Double>(); + column1.add(3.0); + column1.add(4.5); + column1.add(0.01); + + Vector<Integer> column3 = new Vector<Integer>(); + column3.add(1); + column3.add(5); + column3.add(3); + + Vector<String> column4 = new Vector<String>(); + column4.add("bla"); + column4.add("blabla"); + column4.add("blablabla"); + + data = new Vector<Vector<? extends Serializable>>(); + data.add(column1); + data.add(column3); + data.add(column4); + + Vector<Object> types = new Vector<Object>(); + types.add(new Double(4.0)); + types.add(new Integer(4)); + types.add(new String()); + + testDataFrame = new RDataFrame(); + try { + testDataFrame = new RDataFrame(names, rowNames, data, engine, + "test"); + } catch (RException eee) { + Assert.fail(); + } + testDataFrame.exportCsv(new File("/tmp/test.csv"), true, true); + + RDataFrame dataframe3 = new RDataFrame(); + dataframe3.importCsv(new File("/tmp/test.csv"), true, true, types); + + //Test data + Assert + .assertEquals(new Double(3.0), dataframe3.getData().get(0).get( + 0)); + Assert + .assertEquals(new Double(4.5), dataframe3.getData().get(0).get( + 1)); + Assert.assertEquals(new Double(0.01), dataframe3.getData().get(0) + .get(2)); + Assert.assertEquals(new Integer(1), dataframe3.getData().get(1).get(0)); + Assert.assertEquals(new Integer(5), dataframe3.getData().get(1).get(1)); + Assert.assertEquals(new Integer(3), dataframe3.getData().get(1).get(2)); + Assert.assertEquals("bla", dataframe3.getData().get(2).get(0)); + Assert.assertEquals("blabla", dataframe3.getData().get(2).get(1)); + Assert.assertEquals("blablabla", dataframe3.getData().get(2).get(2)); + //Test names + Assert.assertEquals("column1", dataframe3.getNames().get(0)); + Assert.assertEquals("column3", dataframe3.getNames().get(1)); + Assert.assertEquals("column4", dataframe3.getNames().get(2)); + //Test row names + Assert.assertEquals("row 1", dataframe3.getRowNames().get(0)); + Assert.assertEquals("row 2", dataframe3.getRowNames().get(1)); + Assert.assertEquals("row 3", dataframe3.getRowNames().get(2)); + + //Test import without precision on type + + RDataFrame dataframe4 = new RDataFrame(); + dataframe4.importCsv(new File("/tmp/test.csv"), true, true); + + //Test data + Assert.assertEquals("3.0", dataframe4.getData().get(0).get(0)); + Assert.assertEquals("4.5", dataframe4.getData().get(0).get(1)); + Assert.assertEquals("0.01", dataframe4.getData().get(0).get(2)); + Assert.assertEquals("1", dataframe4.getData().get(1).get(0)); + Assert.assertEquals("5", dataframe4.getData().get(1).get(1)); + Assert.assertEquals("3", dataframe4.getData().get(1).get(2)); + Assert.assertEquals("bla", dataframe4.getData().get(2).get(0)); + Assert.assertEquals("blabla", dataframe4.getData().get(2).get(1)); + Assert.assertEquals("blablabla", dataframe4.getData().get(2).get(2)); + //Test names + Assert.assertEquals("column1", dataframe4.getNames().get(0)); + Assert.assertEquals("column3", dataframe4.getNames().get(1)); + Assert.assertEquals("column4", dataframe4.getNames().get(2)); + //Test row names + Assert.assertEquals("row 1", dataframe4.getRowNames().get(0)); + Assert.assertEquals("row 2", dataframe4.getRowNames().get(1)); + Assert.assertEquals("row 3", dataframe4.getRowNames().get(2)); + } + }
participants (1)
-
jcouteau@users.labs.libre-entreprise.org