001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb.manipulator; 020 021import java.net.URLEncoder; 022import java.util.SortedSet; 023import java.util.TreeSet; 024 025import org.dllearner.kb.extraction.Node; 026import org.dllearner.utilities.JamonMonitorLogger; 027import org.dllearner.utilities.datastructures.RDFNodeTuple; 028 029import org.apache.jena.rdf.model.Literal; 030import org.apache.jena.rdf.model.impl.ResourceImpl; 031 032public class StringToResource extends Rule{ 033 034 String namespace; 035 int limit ; 036 037 /** 038 * @param month the month 039 * @param resourceNamespace ns for the created uris 040 * @param limit does not convert strings that are longer than a specific value, zero means convert all 041 */ 042 public StringToResource(Months month, String resourceNamespace, int limit) { 043 super(month); 044 String slash = ""; 045 if(!resourceNamespace.endsWith("/")) { 046 slash="/"; 047 } 048 049 this.namespace = resourceNamespace+slash; 050 this.limit = limit; 051 } 052 053 054 @Override 055 public SortedSet<RDFNodeTuple> applyRule(Node subject, SortedSet<RDFNodeTuple> tuples){ 056 SortedSet<RDFNodeTuple> keep = new TreeSet<>(); 057 for (RDFNodeTuple tuple : tuples) { 058 059 // do nothing if the object contains http:// 060 if(!tuple.b.isURIResource()){ 061 boolean replace = true; 062 063 //check for numbers 064 if(((Literal) tuple.b).getDatatypeURI()!= null){ 065 replace = false; 066 067 } 068 // do nothing if limit is exceeded 069 if(limit != 0 && tuple.b.toString().length()>limit){ 070 replace = false; 071 } 072 073 if(tuple.b.toString().startsWith("http://")){ 074 //System.out.println(tuple.b.toString()); 075 if( 076 tuple.b.toString().startsWith("http://ru.wikipedia.org/wiki/áõÃ") || 077 tuple.bPartContains(" ") 078 079 ){ 080 continue; 081 082 } 083 tuple.b = new ResourceImpl(tuple.b.toString()); 084 replace= false; 085 } 086 087 088 if (replace){ 089 090 String tmp = tuple.b.toString(); 091 try{ 092 //encode 093 tmp = URLEncoder.encode(tmp, "UTF-8"); 094 }catch (Exception e) { 095 e.printStackTrace(); 096 System.exit(0); 097 } 098 099 tuple.b = new ResourceImpl(namespace+tmp); 100 JamonMonitorLogger.increaseCount(StringToResource.class, "convertedToURI"); 101 }else { 102 // do nothing 103 } 104 } 105 106 keep.add(tuple); 107 } 108 return keep; 109 } 110 111 @Override 112 public void logJamon(){ 113 JamonMonitorLogger.increaseCount(StringToResource.class, "replacedObjects"); 114 } 115 116 117 118}