Objet : devel-adl
Archives de la liste
- From: thomas AT lolut.utbm.info
- To: devel AT agendadulibre.org
- Subject: [Devel] r142 - scripts
- Date: Sun, 15 Jan 2006 22:45:15 +0100 (CET)
- List-archive: <http://lolut.utbm.info/pipermail/devel>
- List-id: Developpement de l'Agenda du Libre <devel.agendadulibre.org>
Author: thomas
Date: 2006-01-15 22:45:14 +0100 (Sun, 15 Jan 2006)
New Revision: 142
Added:
scripts/extract-gulls.py
Log:
Ajout du script utilise pour extraire la liste des LUGs du site de l'AFUL.
Added: scripts/extract-gulls.py
===================================================================
--- scripts/extract-gulls.py 2006-01-15 21:44:30 UTC (rev 141)
+++ scripts/extract-gulls.py 2006-01-15 21:45:14 UTC (rev 142)
@@ -0,0 +1,106 @@
+#!/usr/bin/python
+# -*- coding: iso-8859-1 -*-
+
+import HTMLParser, urllib, urlparse
+import re
+
+depts2region = {
+'67' : 1, '68' : 1, '24' : 2, '33' : 2, '40' : 2, '47' : 2, '64' : 2,
'03' : 3,
+'15' : 3, '43' : 3, '63' : 3, '14' : 4, '50' : 4, '61' : 4, '21' : 5,
'58' : 5,
+'71' : 5, '89' : 5, '22' : 6, '29' : 6, '35' : 6, '56' : 6, '18' : 7,
'28' : 7,
+'36' : 7, '37' : 7, '41' : 7, '45' : 7, '08' : 8, '10' : 8, '51' : 8,
'52' : 8,
+'2A' : 9, '2B' : 9, '25' : 10, '39' : 10, '70' : 10, '90' : 10, '27' : 11,
'76' : 11,
+'75' : 12, '77' : 12, '78' : 12, '91' : 12, '92' : 12, '93' : 12, '94' : 12,
'95' : 12,
+'11' : 13, '30' : 13, '34' : 13, '48' : 13, '66' : 13, '19' : 14, '23' : 14,
'87' : 14,
+'54' : 15, '55' : 15, '57' : 15, '88' : 15, '09' : 16, '12' : 16, '31' : 16,
'32' : 16,
+'46' : 16, '65' : 16, '81' : 16, '82' : 16, '59' : 17, '62' : 17, '44' : 18,
'49' : 18,
+'53' : 18, '72' : 18, '85' : 18, '02' : 19, '60' : 19, '80' : 19, '16' : 20,
'17' : 20,
+'79' : 20, '86' : 20, '04' : 21, '05' : 21, '06' : 21, '13' : 21, '83' : 21,
'84' : 21,
+'01' : 22, '07' : 22, '26' : 22, '38' : 22, '42' : 22, '69' : 22, '73' : 22,
'74' : 22,
+'971' : 23, '972' : 25, '973' : 24, '974' : 26 }
+
+class GULLParser(HTMLParser.HTMLParser):
+ def __init__(self):
+ HTMLParser.HTMLParser.__init__(self)
+ self.seen = {}
+ self.currentDepartement = 0
+ self.inLug = False
+ self.inLugLink = False
+ self.currentLugLink = None
+ self.currentLugName = None
+ self.inCountry = False
+ self.currentCountry = None
+
+ def handle_starttag(self, tag, attributs):
+ if tag == 'h3':
+ self.inCountry = True
+ return
+
+ if self.currentCountry != "France":
+ return
+
+ if tag == 'h4':
+ for nom, valeur in attributs:
+ if nom == 'id':
+ departement = re.compile(r'^fr\-([0-9]*)$').search
(valeur)
+ if departement is not None:
+ self.currentDepartement = departement.group(1)
+ if tag == 'li' and self.currentDepartement != 0:
+ self.inLug = True
+ if self.inLug and tag == 'a':
+ self.inLugLink = True
+ for nom, valeur in attributs:
+ if nom == 'href' and self.currentLugLink is None:
+ valeur = valeur.replace("http://g3l.org",
"http://www.g3l.org")
+ valeur = valeur.replace("mailto:gmull � laposte.net",
"http://gmull.tuxfamily.org/")
+ self.currentLugLink = valeur
+
+ def handle_endtag(self, tag):
+ if tag == 'h3':
+ self.inCountry = False
+ return
+
+ if self.currentCountry != "France":
+ return
+
+ if tag == 'li' and self.currentDepartement != 0:
+ self.inLug = False
+ if depts2region.has_key(self.currentDepartement) and
self.currentLugName is not None and self.currentLugLink is not None:
+ print "insert into lugs (region, department, name, url)
values ('" + str(depts2region[str(self.currentDepartement)]) + "', '" +
self.currentDepartement + "', '" + self.currentLugName + "', '" +
self.currentLugLink + "');"
+ self.currentLugLink = None
+ self.currentLugName = None
+
+ if self.inLug and tag == 'a':
+ self.inLugLink = False
+
+ def handle_data(self, data):
+ if self.inCountry:
+ self.currentCountry = data
+
+ if self.inLug and self.inLugLink and self.currentLugName is None:
+ # Hack a little ;-)
+ data = data.replace("\n", " ")
+ data = data.replace("/ D�partement T�l�communications", "")
+ data = data.replace(" (ou GIF)", "")
+ data = data.replace(" (Groupe des Utilisateurs de Logiciels
Libres L�o LAGRANGE)", "")
+ self.currentLugName = data
+
+print "drop table lugs;"
+print """CREATE TABLE lugs (
+ id int(11) NOT NULL auto_increment,
+ region int(11) NOT NULL default '0',
+ department int(11) NOT NULL default '0',
+ name varchar(255) NOT NULL default '',
+ url varchar(255) NOT NULL default '',
+ PRIMARY KEY (id)
+) TYPE=MyISAM AUTO_INCREMENT=2 ;"""
+
+
+p = GULLParser()
+f = urllib.urlopen('http://www.aful.org/gul/liste')
+while True:
+ donnees = f.read(8192)
+ if not donnees:
+ break
+ p.feed(donnees)
+p.close()
Property changes on: scripts/extract-gulls.py
___________________________________________________________________
Name: svn:executable
+ *
- [Devel] r142 - scripts, thomas, 15/01/2006
Archives gérées par MHonArc 2.6.16.