001    /*
002     Copyright (c) 2012, Regents of the University of Colorado
003     All rights reserved.
004    
005     Redistribution and use in source and binary forms, with or without modification, 
006     are permitted provided that the following conditions are met:
007    
008     * Redistributions of source code must retain the above copyright notice, this 
009        list of conditions and the following disclaimer.
010       
011     * Redistributions in binary form must reproduce the above copyright notice, 
012        this list of conditions and the following disclaimer in the documentation 
013        and/or other materials provided with the distribution.
014       
015     * Neither the name of the University of Colorado nor the names of its 
016        contributors may be used to endorse or promote products derived from this 
017        software without specific prior written permission.
018    
019     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
022     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
023     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
024     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
025     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
026     ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
027     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
028     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029     */
030    package edu.ucdenver.ccp.medline.parser;
031    
032    import java.io.EOFException;
033    import java.io.FileNotFoundException;
034    import java.io.IOException;
035    import java.io.InputStream;
036    import java.io.ObjectInputStream;
037    import java.util.Iterator;
038    import java.util.NoSuchElementException;
039    
040    import com.thoughtworks.xstream.XStream;
041    import com.thoughtworks.xstream.io.xml.StaxDriver;
042    import com.thoughtworks.xstream.mapper.CannotResolveClassException;
043    
044    import edu.ucdenver.ccp.common.file.CharacterEncoding;
045    import edu.ucdenver.ccp.common.file.FileReaderUtil;
046    
047    /**
048     * @author Center for Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
049     * 
050     */
051    public class PubmedXmlDeserializer implements Iterator<PubmedArticleBase> {
052    
053            private PubmedArticleBase nextArticle = null;
054            private final ObjectInputStream in;
055    
056            public PubmedXmlDeserializer(InputStream pubmedXmlFileStream) throws FileNotFoundException, IOException {
057                    XStream xstream = new XStream(new StaxDriver());
058                    xstream.processAnnotations(PubmedArticleSet.class);
059                    xstream.processAnnotations(PubmedArticle.class);
060                    xstream.processAnnotations(PubmedBookArticle.class);
061                    xstream.processAnnotations(BookDocument.class);
062                    xstream.processAnnotations(MedlineCitation.class);
063    
064                    in = xstream.createObjectInputStream(FileReaderUtil.initBufferedReader(pubmedXmlFileStream,
065                                    CharacterEncoding.UTF_8));
066            }
067    
068            /*
069             * (non-Javadoc)
070             * 
071             * @see java.util.Iterator#hasNext()
072             */
073            public boolean hasNext() {
074                    if (nextArticle == null) {
075                            try {
076                                    PubmedArticleBase article = (PubmedArticleBase) in.readObject();
077                                    nextArticle = article;
078                            } catch (EOFException e) {
079                                    return false;
080                            } catch (IOException e) {
081                                    throw new RuntimeException(e);
082                            } catch (ClassNotFoundException e) {
083                                    throw new RuntimeException(e);
084                            } catch (CannotResolveClassException e) {
085                                    throw new RuntimeException("The knowledge model for PubMed XML is missing a class: " + e.getMessage()
086                                                    + "\n This class must be added in order for the parse to proceed.");
087                            }
088                    }
089                    return true;
090            }
091    
092            /*
093             * (non-Javadoc)
094             * 
095             * @see java.util.Iterator#next()
096             */
097            public PubmedArticleBase next() {
098                    if (!hasNext())
099                            throw new NoSuchElementException();
100    
101                    PubmedArticleBase articleToReturn = nextArticle;
102                    nextArticle = null;
103                    return articleToReturn;
104            }
105    
106            /*
107             * (non-Javadoc)
108             * 
109             * @see java.util.Iterator#remove()
110             */
111            public void remove() {
112                    throw new UnsupportedOperationException("The remove() method is not supported for this iterator.");
113            }
114    
115    }