Add flat vision dataset size computation

It is about 8.7 GB.
This commit is contained in:
Benjamin Loison 2024-03-21 17:15:56 +01:00
parent e23543506a
commit 408b7a2ba9
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8
2 changed files with 37 additions and 0 deletions

View File

@ -0,0 +1 @@
https://lesc.dinfo.unifi.it/VISION/dataset/

View File

@ -0,0 +1,36 @@
import requests
from lxml import html
url = 'https://lesc.dinfo.unifi.it/VISION/dataset/'
def getFolderEntries(url):
text = requests.get(url).text
tree = html.fromstring(text)
# Remove legend, deisgn and `Parent Directory` entries.
entriesLines = tree.xpath('//tr')[3:-1]
entries = []
for entryLine in entriesLines:
entryColumns = entryLine.xpath('td')
entryName = entryColumns[1].text_content()
entrySize = entryColumns[3].text_content().strip()
if entrySize != '-':
sizeUnit = entrySize[-1]
sizeValue = float(entrySize[:-1])
entrySize = int(sizeValue * {
'K': 1_000,
'M': 1_000_000,
}[sizeUnit])
entries += [[entryName, entrySize]]
return entries
totalPhotoSizes = 0
phoneFolders = getFolderEntries(url)
for phoneFolder, _ in phoneFolders:
phoneName = phoneFolder[:-1]
print(phoneName)
phonePhotos = getFolderEntries(f'{url}/{phoneName}/images/flat/')
for phonePhotoName, phonePhotoSize in phonePhotos:
print(phonePhotoName, phonePhotoSize)
totalPhotoSizes += phonePhotoSize
print(totalPhotoSizes)