python-modules/diapo-clean.py at master · deborah-powers/python-modules · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/python3.6
# -*- coding: utf-8 -*-
from functools import cmp_to_key
from fileCls import File
from fileList import FileList, FileTable
import loggerFct as log

def comparPhotoByCity (photoA, photoI):
	if photoA[1] > photoI[1]: return 1
	elif photoA[1] < photoI[1]: return -1
	elif photoA[2] > photoI[2]: return 1
	elif photoA[2] < photoI[2]: return -1
	elif photoA[0] > photoI[0]: return 1
	elif photoA[0] < photoI[0]: return -1
	elif photoA[4] > photoI[4]: return 1
	elif photoA[4] < photoI[4]: return -1
	elif photoA[3] > photoI[3]: return 1
	elif photoA[3] < photoI[3]: return -1
	elif photoA[5] > photoI[5]: return 1
	elif photoA[5] < photoI[5]: return -1

def comparPhotoByDate (photoA, photoI):
	if photoA[0] > photoI[0]: return 1
	elif photoA[0] < photoI[0]: return -1
	elif photoA[1] > photoI[1]: return 1
	elif photoA[1] < photoI[1]: return -1
	elif photoA[2] > photoI[2]: return 1
	elif photoA[2] < photoI[2]: return -1
	elif photoA[4] > photoI[4]: return 1
	elif photoA[4] < photoI[4]: return -1
	elif photoA[3] > photoI[3]: return 1
	elif photoA[3] < photoI[3]: return -1
	elif photoA[5] > photoI[5]: return 1
	elif photoA[5] < photoI[5]: return -1


"""
sorted (mylist, key=cmp_to_key (comparPhotoByCity))
fileA = FileTable ('b/diaporama.txt')
"""
fileA = FileTable ('s/portfolio\\diaporama\\photos-data.tsv')
fileA.read()
fileA.sort()

def cleanFsimple():
	while "   " in fileA.text: fileA.replace ("   ","  ")
	fileA.replace ("  ","\t")
	fileA.replace (' 201', '\t201')
	fileA.replace (' 202', '\t202')
	eraseWords =( ' (1)', ' (2)', ' (3)', '-s-no-gm' )
	for word in eraseWords: fileA.replace (word)
	nombres = '0123456789'
	for nba in nombres:
		for nbo in nombres: fileA.replace ('-'+ nba + nbo +" ", '-'+ nba + nbo +'\t')
	cities = ('paris', 'chtd', 'ftb', 'fontainebleau', 'avon', 'rueil', 'angers', 'cmaman', 'cpapa', 'issy', 'blr', 'jplantes', 'parc balzac', 'balzac' 'bagatelle', 'teiffel', 'boulogne', 'bboulogne', 'chateau', 'château', 'lengelen')
	for city in cities:
		fileA.replace (city +" ", city +'\t')
		fileA.replace (" "+ city, '\t'+ city)
	pronoms = ('de', 'du', 'au', 'aux')
	for pronom in pronoms:
		fileA.replace ('\t'+ pronom +'\t'," "+ pronom +" ")
		fileA.replace (" "+ pronom +'\t'," "+ pronom +" ")
		fileA.replace ('\t'+ pronom +" "," "+ pronom +" ")
	citiesAbreviation ={ 'chateau': 'château', 'chtd': 'châteaudun', 'ftb': 'fontainebleau', 'cmaman': 'intérieur', 'cpapa': 'intérieur', 'blr': 'bois le rois', 'jplantes': 'paris\tjardin des plantes', 'teiffel': 'paris\ttour eiffel', 'bboulogne': 'boulogne\tbois' }
	citiesKey = citiesAbreviation.keys()
	for abbr in citiesKey: fileA.replace (abbr, citiesAbreviation[abbr])
	fileA.replace ('paris\tparis', 'paris')
	fileA.replace ('paris paris', 'paris')
	photoList = fileA.text.split ('\n')
	photoList.sort()
	fileA.text = '\n'.join (photoList)

def findDatePos (line):
	d=-1
	rangeCell = range (len (line))
	for c in rangeCell:
		if line[c][:2] == '20': d=c
	return d

def cleanDates():
	rangeLines = range (len (fileA))
	for l in rangeLines:
		if fileA[l][0][:2] == '20': continue
		else:
			d= findDatePos (fileA[l])
			if d>0:
				cellDate = fileA[l].pop (d)
				fileA[l].insert (0, cellDate)
	fileA.sort()

def cleanCities():
	cities = ('paris', 'fontainebleau', 'avon', 'rueil', 'angers', 'beaucouzé', 'intérieur', 'issy', 'boulogne', 'châteaudun')
	rangeLines = range (len (fileA))
	for l in rangeLines:
		if fileA[l][1] in cities: continue
		else:
			nbCells = len (fileA[l])
			rangeCells = range (2, nbCells -1)
			c=0
			posCity =0
			while c< nbCells and posCity:
				if fileA[l][c] in cities: posCity =c
				c+=1
			if c>0:
				cellCity = fileA[l].pop (posCity)
				fileA[l].insert (1, cellCity)
	fileA.sort()

def eraseUrlDoubles():
	fileRef = File ('s/portfolio\\diaporama\\photos-data.csv')
	fileRef.read()
	rangeLines = reversed (range (len (fileA)))
	trash =[]
	for l in rangeLines:
		if fileA.list[l][-1] in fileRef.text:
		#	print (fileA.list[l][:3], fileA.list[l][-1][-9:])
			print (fileA.list[l])
			trash = fileA.list.pop (l)

def findDatePlaceDoubles():
	fileRef = File ('s/portfolio\\diaporama\\photos-data.csv')
	fileRef.read()
	print ('doublons de lieux et de dates')
	for line in fileA.list:
		if line[0] +'\t'+ line[1] in fileRef.text: print (line[0], line[1])

def findDatePlaceDoublesBis():
	fileRef = FileTable ('s/portfolio\\diaporama\\photos-data.csv')
	fileRef.read()
	print ('doublons de lieux et de dates')
	for line in fileA.list:
		for ref in fileRef.list:
			if line[0] in ref[0] and line[1] == ref[1]: print (line[0], ref[0], line[1])

def listThemes():
	themes =[]
	for line in fileA.list:
		if line[3] not in themes: themes.append (line[3])
	themes.sort()
	for theme in themes: print (theme)

def sortThemes():
	themes =[]
	rangeLines = range (len (fileA.list))
	for l in rangeLines:
		themes = fileA.list[l][3].split (", ")
		themes.sort()
		fileA.list[l][3] = ", ".join (themes)

fileA.list = sorted (fileA.list, key=cmp_to_key (comparPhotoByCity))
# fileA.title = fileA.title +" bis"
fileA.write()