/* Copyright (c) 2005-2006 Ricebridge. All Rights Reserved. * * This file is available under the terms and conditions of the * Ricebridge "Open Source API" policy; Ricebridge grants use of this * copyrighted work under the terms of a BSD-style license only. See * http://www.opensource.org/licenses/bsd-license.php for more * information. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * - Neither the name of the Ricebridge nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ import com.ricebridge.xmlman.XmlManager; import com.ricebridge.xmlman.RecordSpec; import com.ricebridge.xmlman.XmlSpec; import com.ricebridge.csvman.CsvManager; import com.ricebridge.csvman.CsvSpec; import com.ricebridge.csvman.LineListenerSupport; import com.ricebridge.csvman.BadLine; import java.util.*; import java.io.*; /** Convert a CSV file into an XML file, as a continuous data stream. * In this example, the entire CSV file is NOT loaded into memory. * Instead, each line of the CSV file is saved immediately as a stream * of XML elements. This means that you can process very large multi-gigabyte * files without running out of memory. * The data formats for this example as the same as for the * Csv2Xml.java example. You should review that example first as this one * is based on it. */ public class StreamingCsv2Xml { // public methods public static void main( String[] args ) { File csv = new File("report.csv"); File xml = new File("report.xml"); convert( csv, xml ); } public static void convert( File pCsvFile, File pXmlFile ) { // Create the CSV Manager used to load the data. CsvManager csvman = new CsvManager(); CsvSpec csvspec = csvman.getCsvSpec(); csvspec.setEncoding("UTF-8"); csvspec.setStartLine(8); String[] fieldpaths = new String[] { "/report/campaign/day/@date", "/report/campaign/@name", "/report/campaign/@group", "@text", "@match", "@status", "cpc/@min", "cpc/@max", "cpm/@max", "url", "clicks/@imp", "clicks/@count", "clicks/@ctr", "clicks/@avgcpc", "clicks/@avgcpm", "cost", "avgpos", }; RecordSpec keyspec = new RecordSpec("/report/campaign/day/keyword", fieldpaths ); // This is the object that enables us to stream the data. StreamingLineListener streamer = new StreamingLineListener( pXmlFile, keyspec ); // Load the CSV file as a stream, the XML data is saved in the // StreamingLineListener object. csvman.load( pCsvFile, streamer ); } /** Stream CSV records to an XML file. */ public static class StreamingLineListener extends LineListenerSupport { // private instance /** The XML Manager used for output. */ private XmlManager iXmlManager = null; /** The RecordSpec defined the output XML format. */ private RecordSpec iRecordSpec = null; /** The destination XML file. */ private File iXmlFile = null; /** The XML file output stream (kept open for appending). */ private FileOutputStream iXmlOS = null; // public methods /** Create the stream handler. * @param pXmlFile the XML file to create * @param pRecordSpec the XMl format */ public StreamingLineListener( File pXmlFile, RecordSpec pRecordSpec ) { iXmlFile = pXmlFile; iRecordSpec = pRecordSpec; } /** Called by CSV Manager once at start of loading. */ public void startLoadImpl() throws Exception { iXmlManager = new XmlManager( iRecordSpec ); XmlSpec xmlspec = iXmlManager.getXmlSpec(); xmlspec.setHeader( "\n" ); xmlspec.setStreamOutput( true ); // open the file for appending, and keep it open iXmlOS = new FileOutputStream( iXmlFile, true ); } /** Called by CSV Manager for each line of the CSV file. * @param pLine the data fields (missing fields are empty String, not null) * @param pNumFields actual number of fields found * @param pLineNumber number of the current line * @param pOriginalLine text of the original CSV line */ public BadLine addLineImpl( String[] pLine, int pNumFields, long pLineNumber, String pOriginalLine ) throws Exception { // only output XML if there is data and avoid the last line which contains totals if( 0 < pLine.length && !"Totals and Overall Averages:".equals(pLine[0]) ) { // just create a list with one data record // we could buffer the records and only output them // every 100 records say, but we'll keep this example simple ArrayList line = new ArrayList(); line.add( pLine ); iXmlManager.save( iXmlOS, line ); } // we are not doing any semantic validation, // so return a null BadLine to indicate that // everything was fine return null; } /** Called by CSV Manager once at end of loading. */ public void endLoadImpl() throws Exception { // outputs the final closing XML element tags iXmlManager.finishSave(); // and close the open output stream iXmlOS.close(); } } }