/* Copyright (c) 2005-2006 Ricebridge. All Rights Reserved. * * This file is available under the terms and conditions of the * Ricebridge "Open Source API" policy; Ricebridge grants use of this * copyrighted work under the terms of a BSD-style license only. See * http://www.opensource.org/licenses/bsd-license.php for more * information. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * - Neither the name of the Ricebridge nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ import com.ricebridge.xmlman.XmlManager; import com.ricebridge.xmlman.RecordSpec; import com.ricebridge.xmlman.XmlSpec; import com.ricebridge.xmlman.RecordListenerSupport; import com.ricebridge.xmlman.BadRecord; import com.ricebridge.csvman.CsvManager; import com.ricebridge.csvman.CsvSpec; import java.util.*; import java.io.*; /** Convert an XML file into a CSV file, as a continuous data stream. * In this example, the entire XML file is NOT loaded into memory. * Instead, each data record extracted from the XML file is saved * immediately as a line of the CSv file. This means that you can * process very large multi-gigabyte files without running out of memory. * The data formats for this example as the same as for the * Xml2CSv.java example. You should review that example first as this one * is based on it. */ public class StreamingXml2Csv { // public methods public static void main( String[] args ) { File xml = new File("GetSearchResults.xml"); File csv = new File("GetSearchResults.csv"); convert( xml, csv ); } public static void convert( File pXmlFile, File pCsvFile ) { // Create the XML Manager used to load the data. XmlManager xmlman = new XmlManager(); XmlSpec xmlspec = xmlman.getXmlSpec(); xmlspec.addNamespace("e","urn:ebay:apis:eBLBaseComponents"); String[] fieldpaths = new String[] { "e:Item/e:ItemID", "e:Item/e:ListingDetails/e:StartTime", "e:Item/e:ListingDetails/e:EndTime", "e:Item/e:ListingDetails/e:ViewItemURL", "e:Item/e:SellingStatus/e:BidCount", "e:Item/e:SellingStatus/e:CurrentPrice/@currencyID", "e:Item/e:SellingStatus/e:CurrentPrice", "e:Item/e:Site", "e:Item/e:Title", "e:Item/e:Country", }; RecordSpec itemspec = new RecordSpec("e:GetSearchResultsResponse/e:SearchResultItemArray/e:SearchResultItem", fieldpaths ); // This is the object that enables us to stream the data. StreamingRecordListener streamer = new StreamingRecordListener( pCsvFile ); // Load the XML file as a stream, the CSV data is saved in the // StreamingRecordListener object. xmlman.load( pXmlFile, itemspec, streamer ); } /** Stream XML records to a CSV file. */ public static class StreamingRecordListener extends RecordListenerSupport { // private instance /** The CSV Manager used for output. */ private CsvManager iCsvManager = null; /** The destination CSV file. */ private File iCsvFile = null; /** The CSV file output stream (kept open for appending). */ private FileOutputStream iCsvOS = null; // public methods /** Create the stream handler. * @param pCsvFile the CSV file to create */ public StreamingRecordListener( File pCsvFile ) { iCsvFile = pCsvFile; } /** Called by XML Manager once at start of loading. */ protected void startProcessImpl() throws Exception { iCsvManager = new CsvManager(); CsvSpec csvspec = iCsvManager.getCsvSpec(); csvspec.setEncoding( "UTF-8" ); csvspec.setCloseOutputStream( false ); csvspec.setFlushEachLine( true ); // open the file for appending, and keep it open iCsvOS = new FileOutputStream( iCsvFile, true ); ArrayList headers = new ArrayList(); headers.add( new String[] {"ItemID","StartTime","EndTime","ViewItemURL","BidCount","Currency","Price","Site","Title","Country"} ); iCsvManager.save( iCsvOS, headers ); } /** Call by XML Manager for each data record extract from the XML file. * @param pRecord record data fields * @param pRecordNumber number of records found so far */ protected BadRecord handleRecordImpl( String[] pRecord, long pRecordNumber ) throws Exception { // just create a list with one data record // we could buffer the records and only output them // every 100 records say, but we'll keep this example simple ArrayList row = new ArrayList(); row.add( pRecord ); iCsvManager.save( iCsvOS, row ); // we are not doing any semantic validation, // so return a null BadRecord to indicate that // everything was fine return null; } /** Called by XML Manager once at end of loading. */ protected void endProcessImpl() throws Exception { iCsvOS.close(); } } }