001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.kb.manipulator;
020
021import java.net.URLEncoder;
022import java.util.SortedSet;
023import java.util.TreeSet;
024
025import org.dllearner.kb.extraction.Node;
026import org.dllearner.utilities.JamonMonitorLogger;
027import org.dllearner.utilities.datastructures.RDFNodeTuple;
028
029import org.apache.jena.rdf.model.Literal;
030import org.apache.jena.rdf.model.impl.ResourceImpl;
031
032public class StringToResource extends Rule{
033        
034        String namespace;
035        int limit ;
036
037        /**
038         * @param month the month
039         * @param resourceNamespace ns for the created uris
040         * @param limit does not convert strings that are longer than a specific value, zero means convert all
041         */
042        public StringToResource(Months month, String resourceNamespace, int limit) { 
043                super(month);
044                String slash = "";
045                if(!resourceNamespace.endsWith("/")) {
046                        slash="/";
047                }
048                
049                this.namespace = resourceNamespace+slash;
050                this.limit = limit;
051        }
052        
053        
054        @Override
055        public  SortedSet<RDFNodeTuple> applyRule(Node subject, SortedSet<RDFNodeTuple> tuples){
056                SortedSet<RDFNodeTuple> keep = new TreeSet<>();
057                for (RDFNodeTuple tuple : tuples) {
058                        
059                        // do nothing if the object contains http://
060                        if(!tuple.b.isURIResource()){
061                                boolean replace = true;
062                                
063                                //check for numbers 
064                                if(((Literal) tuple.b).getDatatypeURI()!= null){
065                                                replace = false; 
066                                                        
067                                }
068                                // do nothing if limit is exceeded
069                                if(limit != 0 && tuple.b.toString().length()>limit){
070                                        replace = false;
071                                }
072                                
073                                if(tuple.b.toString().startsWith("http://")){
074                                        //System.out.println(tuple.b.toString());
075                                        if(
076                                                tuple.b.toString().startsWith("http://ru.wikipedia.org/wiki/СеÑ") ||
077                                                tuple.bPartContains(" ")
078                                                        
079                                        ){
080                                                continue;
081                                                
082                                        }
083                                        tuple.b = new ResourceImpl(tuple.b.toString());
084                                        replace= false;
085                                }
086                                
087                                
088                                if (replace){
089                                
090                                        String tmp = tuple.b.toString();
091                                        try{
092                                                //encode
093                                        tmp = URLEncoder.encode(tmp, "UTF-8");
094                                        }catch (Exception e) {
095                                                e.printStackTrace();
096                                                System.exit(0);
097                                        }
098                                        
099                                        tuple.b = new ResourceImpl(namespace+tmp);
100                                        JamonMonitorLogger.increaseCount(StringToResource.class, "convertedToURI");
101                                }else {
102                                        // do nothing
103                                }
104                        }
105                        
106                        keep.add(tuple);
107                }
108                return  keep;
109        }
110
111        @Override
112        public void logJamon(){
113                JamonMonitorLogger.increaseCount(StringToResource.class, "replacedObjects");
114        }
115        
116        
117        
118}