001/** 002 * Copyright (C) 2007-2011, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019 020package org.dllearner.scripts.analyse; 021 022import org.dllearner.scripts.analyse.CountInstances.Count; 023import org.dllearner.utilities.Files; 024 025import java.io.File; 026import java.util.*; 027 028public class ScriptDoAll { 029 030 public static String subclassof = "http://www.w3.org/2000/01/rdf-schema#subClassOf"; 031 public static String broader = "http://www.w3.org/2004/02/skos/core#broader"; 032 033 public static String subject = "http://www.w3.org/2004/02/skos/core#subject"; 034 public static String rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; 035 036 037 public static String catns = "http://dbpedia.org/resource/Category:"; 038 public static String dbns = "http://dbpedia.org/ontology/"; 039 public static String yagons = "http://dbpedia.org/class/yago/"; 040 041 static CountInstances c = new CountInstances("http://db0.aksw.org:8893/sparql", Arrays.asList("http://dbpedia.org/ontology")); 042 043 public static void main(String[] args) { 044 045 String dbpediaFile = "dbpedia_3.5.1.owl"; 046 @SuppressWarnings("unused") 047 String yagoFile = "yagoclasses_links.nt"; 048 @SuppressWarnings("unused") 049 String categoryFile = "skoscategories_en.nt"; 050 051 doIt(dbpediaFile, "RDF/XML", subclassof, rdftype, dbns,false); 052// doIt(yagoFile, "N-TRIPLES", subclassof, rdftype, yagons,false); 053// doIt(categoryFile, "N-TRIPLES", broader, subject, catns, true); 054 055 } 056 057 public static void doIt(String file, String format, String relation, String type, String nsFilter, boolean noExpand){ 058 059 Map<String, SortedSet<String>> dbdown = new Hierarchy().getHierarchyDown(file, format, relation, noExpand); 060 Files.writeObjectToFile(dbdown, new File(file+".sub.ser")); 061 Map<String, SortedSet<String>> dbup = new Hierarchy().getHierarchyUp(file, format, relation, noExpand); 062 Files.writeObjectToFile(dbup, new File(file+".super.ser")); 063 064 dbup = null; 065 066 List<Count> countdb = c.countInstances(type, nsFilter); 067 068 toFile(countdb, file+".count"); 069 070 toFile(expand(countdb, dbdown), file+".expanded.count"); 071 072 Files.writeObjectToFile(purge(countdb, dbdown), new File( file+".purged.ser")); 073 074 075 } 076 077 public static Map<String, SortedSet<String>> purge(List<Count> count, Map<String, SortedSet<String>> hierarchy){ 078 Map<String, Integer> map = toMap(count); 079// System.out.println(hierarchy.size()); 080 Map<String, SortedSet<String>> ret = new HashMap<>(); 081 for(String key: hierarchy.keySet()){ 082 SortedSet<String> tmp = new TreeSet<>(); 083 for(String s : hierarchy.get(key)){ 084 if(map.get(s)!=null){ 085 tmp.add(s); 086 }else{ 087// System.out.println("purged: "+s); 088 } 089 } 090 ret.put(key, tmp); 091 092 } 093// System.out.println(ret.size()); 094 return ret; 095 } 096 097 public static List<Count> expand(List<Count> count, Map<String, SortedSet<String>> hierarchy){ 098 Map<String, Integer> classNrOfInstances = toMap(count); 099 SortedSet<Count> ret = new TreeSet<>(); 100 SortedSet<String> allClasses = new TreeSet<>(); 101 allClasses.addAll(classNrOfInstances.keySet()); 102 allClasses.addAll(hierarchy.keySet()); 103 104 for(String key : allClasses){ 105 106 SortedSet<String> expanded = hierarchy.get(key); 107 int now = 0; 108 if(classNrOfInstances.get(key) != null){ 109 now = classNrOfInstances.get(key).intValue(); 110 } 111 112 if(expanded == null){ 113 //just add this one, i.e. no subclasses 114 ret.add(c.new Count(key, now)); 115 }else{ 116 Integer add = null; 117 for(String rel:expanded){ 118 if(!rel.equals(key) && (add = classNrOfInstances.get(rel))!=null ){ 119 now += add; 120 } 121 } 122 ret.add(c.new Count(key, now)); 123 } 124 125 } 126 return new ArrayList<>(ret); 127 } 128 129 public static Map<String, Integer> toMap(List<Count> c){ 130 Map<String, Integer> ret = new HashMap<>(); 131 for(Count count: c){ 132 ret.put(count.uri, count.count); 133 } 134 return ret; 135 } 136 137 public static void toFile(List<Count> c, String filename){ 138 StringBuffer buf = new StringBuffer(); 139 for (Count count : c) { 140 buf.append(count.toString()).append("\n"); 141 } 142 143 Files.createFile(new File(filename), buf.toString()); 144 } 145 146 147 148 149 150}