001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.kb.manipulator;
020
021import java.net.URLEncoder;
022import java.util.SortedSet;
023import java.util.TreeSet;
024
025import org.dllearner.kb.extraction.Node;
026import org.dllearner.utilities.JamonMonitorLogger;
027import org.dllearner.utilities.datastructures.RDFNodeTuple;
028import org.dllearner.utilities.owl.OWLVocabulary;
029
030import org.apache.jena.rdf.model.Literal;
031import org.apache.jena.rdf.model.impl.ResourceImpl;
032
033public class AddAllStringsAsClasses extends Rule{
034        String namespace;
035
036        /**
037         * @param month the month
038         * @param resourceNamespace ns for the created uris
039         */
040        public AddAllStringsAsClasses(Months month, String resourceNamespace) { 
041                super(month);
042                String slash = "";
043                if(!resourceNamespace.endsWith("/")) {
044                        slash="/";
045                }
046                
047                this.namespace = resourceNamespace+slash;
048                
049        }
050        
051        
052        @Override
053        public  SortedSet<RDFNodeTuple> applyRule(Node subject, SortedSet<RDFNodeTuple> tuples){
054                SortedSet<RDFNodeTuple> keep = new TreeSet<>();
055                for (RDFNodeTuple tuple : tuples) {
056                        //System.out.println(tuple);
057                        //System.exit(0);
058                        if(tuple.b.isURIResource()){
059                                //System.out.println("added");
060                                keep.add(tuple);
061                                continue;
062                        }
063                
064                        //RDFNode b = null;
065                        if(!tuple.b.isURIResource()){
066                                boolean replace = true;
067                                
068                                //check for numbers 
069                                if(((Literal) tuple.b).getDatatypeURI()!= null){
070                                                replace = false; 
071                                }
072                                                        
073                                //if string is an uri
074                                if(tuple.b.toString().startsWith("http://")){
075                                        //System.out.println(tuple.b.toString());
076                                        if(     tuple.b.toString().startsWith("http://ru.wikipedia.org/wiki/СеÑ") ||
077                                                tuple.bPartContains(" ")
078                                        ){      
079                                                //filter
080                                                continue; 
081                                        }
082                                        
083                                        tuple.b = new ResourceImpl(tuple.b.toString());
084                                        replace = false;
085                                }
086                                
087                                
088                                if (replace){
089                                        
090                                        String tmp = tuple.b.toString();
091                                        //System.out.println("replaced: "+tmp);
092                                        try{
093                                                //encode
094                                        tmp = URLEncoder.encode(tmp, "UTF-8");
095                                        }catch (Exception e) {
096                                                e.printStackTrace();
097                                                System.exit(0);
098                                        }
099                                                tmp = namespace+tmp;
100                                                tmp = tmp.replaceAll("%", "_");
101                                                tmp = "c"+tmp;
102                                                keep.add(new RDFNodeTuple(new ResourceImpl(OWLVocabulary.RDF_TYPE),new ResourceImpl(tmp)));
103                                        
104                                }else {
105                                        // do nothing
106                                }//end else
107                        
108                        }//end if
109                        keep.add(tuple);
110                }
111                return  keep;
112        }
113
114        @Override
115        public void logJamon(){
116                JamonMonitorLogger.increaseCount(AddAllStringsAsClasses.class, "replacedObjects");
117        }
118        
119        
120        
121}