001 /*
002 Copyright (c) 2012, Regents of the University of Colorado
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without modification,
006 are permitted provided that the following conditions are met:
007
008 * Redistributions of source code must retain the above copyright notice, this
009 list of conditions and the following disclaimer.
010
011 * Redistributions in binary form must reproduce the above copyright notice,
012 this list of conditions and the following disclaimer in the documentation
013 and/or other materials provided with the distribution.
014
015 * Neither the name of the University of Colorado nor the names of its
016 contributors may be used to endorse or promote products derived from this
017 software without specific prior written permission.
018
019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
023 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030 package edu.ucdenver.ccp.medline.parser;
031
032 import java.io.EOFException;
033 import java.io.FileNotFoundException;
034 import java.io.IOException;
035 import java.io.InputStream;
036 import java.io.ObjectInputStream;
037 import java.util.Iterator;
038 import java.util.NoSuchElementException;
039
040 import com.thoughtworks.xstream.XStream;
041 import com.thoughtworks.xstream.io.xml.StaxDriver;
042 import com.thoughtworks.xstream.mapper.CannotResolveClassException;
043
044 import edu.ucdenver.ccp.common.file.CharacterEncoding;
045 import edu.ucdenver.ccp.common.file.FileReaderUtil;
046
047 /**
048 * @author Center for Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
049 *
050 */
051 public class PubmedXmlDeserializer implements Iterator<PubmedArticleBase> {
052
053 private PubmedArticleBase nextArticle = null;
054 private final ObjectInputStream in;
055
056 public PubmedXmlDeserializer(InputStream pubmedXmlFileStream) throws FileNotFoundException, IOException {
057 XStream xstream = new XStream(new StaxDriver());
058 xstream.processAnnotations(PubmedArticleSet.class);
059 xstream.processAnnotations(PubmedArticle.class);
060 xstream.processAnnotations(PubmedBookArticle.class);
061 xstream.processAnnotations(BookDocument.class);
062 xstream.processAnnotations(MedlineCitation.class);
063
064 in = xstream.createObjectInputStream(FileReaderUtil.initBufferedReader(pubmedXmlFileStream,
065 CharacterEncoding.UTF_8));
066 }
067
068 /*
069 * (non-Javadoc)
070 *
071 * @see java.util.Iterator#hasNext()
072 */
073 public boolean hasNext() {
074 if (nextArticle == null) {
075 try {
076 PubmedArticleBase article = (PubmedArticleBase) in.readObject();
077 nextArticle = article;
078 } catch (EOFException e) {
079 return false;
080 } catch (IOException e) {
081 throw new RuntimeException(e);
082 } catch (ClassNotFoundException e) {
083 throw new RuntimeException(e);
084 } catch (CannotResolveClassException e) {
085 throw new RuntimeException("The knowledge model for PubMed XML is missing a class: " + e.getMessage()
086 + "\n This class must be added in order for the parse to proceed.");
087 }
088 }
089 return true;
090 }
091
092 /*
093 * (non-Javadoc)
094 *
095 * @see java.util.Iterator#next()
096 */
097 public PubmedArticleBase next() {
098 if (!hasNext())
099 throw new NoSuchElementException();
100
101 PubmedArticleBase articleToReturn = nextArticle;
102 nextArticle = null;
103 return articleToReturn;
104 }
105
106 /*
107 * (non-Javadoc)
108 *
109 * @see java.util.Iterator#remove()
110 */
111 public void remove() {
112 throw new UnsupportedOperationException("The remove() method is not supported for this iterator.");
113 }
114
115 }