#!/usr/bin/env python3 import rdflib from debian import debtags g = rdflib.Graph() g.parse("directory.xml") qres = g.query(""" PREFIX swivt: <http://semantic-mediawiki.org/swivt/1.0#> PREFIX wiki: <http://localhost/wiki/Special:URIResolver/> SELECT DISTINCT ?label ?source ?page WHERE { ?subject a swivt:Subject . ?subject rdfs:label ?label . ?subject wiki:Property-3ASubmitted_by wiki:Debian_import . ?subject wiki:Property-3AImport_source_link ?source . ?subject swivt:page ?page } """) packages = {} for label, source, page in qres: package = source.split("/")[-1] page = "https://directory.fsf.org/" + page[17:] packages[package] = (label, page) db = debtags.DB() db.read(open("/var/lib/debtags/package-tags", "r")) data_packages = db.packages_of_tag("role::data").union(db.packages_of_tag("role::app-data")) for package in sorted(data_packages.intersection(packages.keys())): label, page = packages[package] print(package, page)