001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb.manipulator; 020 021import java.net.URLEncoder; 022import java.util.SortedSet; 023import java.util.TreeSet; 024 025import org.dllearner.kb.extraction.Node; 026import org.dllearner.utilities.JamonMonitorLogger; 027import org.dllearner.utilities.datastructures.RDFNodeTuple; 028import org.dllearner.utilities.owl.OWLVocabulary; 029 030import org.apache.jena.rdf.model.Literal; 031import org.apache.jena.rdf.model.impl.ResourceImpl; 032 033public class AddAllStringsAsClasses extends Rule{ 034 String namespace; 035 036 /** 037 * @param month the month 038 * @param resourceNamespace ns for the created uris 039 */ 040 public AddAllStringsAsClasses(Months month, String resourceNamespace) { 041 super(month); 042 String slash = ""; 043 if(!resourceNamespace.endsWith("/")) { 044 slash="/"; 045 } 046 047 this.namespace = resourceNamespace+slash; 048 049 } 050 051 052 @Override 053 public SortedSet<RDFNodeTuple> applyRule(Node subject, SortedSet<RDFNodeTuple> tuples){ 054 SortedSet<RDFNodeTuple> keep = new TreeSet<>(); 055 for (RDFNodeTuple tuple : tuples) { 056 //System.out.println(tuple); 057 //System.exit(0); 058 if(tuple.b.isURIResource()){ 059 //System.out.println("added"); 060 keep.add(tuple); 061 continue; 062 } 063 064 //RDFNode b = null; 065 if(!tuple.b.isURIResource()){ 066 boolean replace = true; 067 068 //check for numbers 069 if(((Literal) tuple.b).getDatatypeURI()!= null){ 070 replace = false; 071 } 072 073 //if string is an uri 074 if(tuple.b.toString().startsWith("http://")){ 075 //System.out.println(tuple.b.toString()); 076 if( tuple.b.toString().startsWith("http://ru.wikipedia.org/wiki/áõÃ") || 077 tuple.bPartContains(" ") 078 ){ 079 //filter 080 continue; 081 } 082 083 tuple.b = new ResourceImpl(tuple.b.toString()); 084 replace = false; 085 } 086 087 088 if (replace){ 089 090 String tmp = tuple.b.toString(); 091 //System.out.println("replaced: "+tmp); 092 try{ 093 //encode 094 tmp = URLEncoder.encode(tmp, "UTF-8"); 095 }catch (Exception e) { 096 e.printStackTrace(); 097 System.exit(0); 098 } 099 tmp = namespace+tmp; 100 tmp = tmp.replaceAll("%", "_"); 101 tmp = "c"+tmp; 102 keep.add(new RDFNodeTuple(new ResourceImpl(OWLVocabulary.RDF_TYPE),new ResourceImpl(tmp))); 103 104 }else { 105 // do nothing 106 }//end else 107 108 }//end if 109 keep.add(tuple); 110 } 111 return keep; 112 } 113 114 @Override 115 public void logJamon(){ 116 JamonMonitorLogger.increaseCount(AddAllStringsAsClasses.class, "replacedObjects"); 117 } 118 119 120 121}