A friend was looking for a way to list the space usage on a windows server that only had FTP access. I had written something similar for a project long ago, and polished up to do the job.
This python script will walk an FTP directory in a top-down, depth-first pattern. It uses the ftplib library, which I believe is built-in to most or all python distributions. Configure the FTP_* variables near the top to set the server, port, user, password, and the delay between each FTP operation (to avoid hammering the server). The script recursively processes directories, creating a dirStruct tuple that contains the following items:
(pwd, subdirList, fileList, sizeInFilesHere, sizeTotal) pwd is a string like "/debian/dists/experimental" subdirList is a list of tuples just like this one fileList is a list of (filename, sizeInBytes) tuples sizeInFilesHere is a sum of all the files in this directory sizeTotal is a sum of all the files in this directory and all subdirectories
It also writes data to two CSV files:
- dirStruct_only_folders.csv
- Contains entries for just the directories.
- Local size is the total size of files in that folder (does not count subdirs).
- Total size is the sum of local size and total size of all subdirs.
- dirStruct_complete.csv
- Contains entries for both files and folders.
- Files do not have a total size, only a local size.
#!/usr/bin/env python # # A script to recursively walk an FTP server directory structure, recording information # about the file and directory sizes as it traverses the folders. # # Stores output in two CSV files: # dirStruct_only_folders.csv # Contains entries for just the directories. # Local size is the total size of files in that folder (does not count subdirs). # Total size is the sum of local size and total size of all subdirs. # dirStruct_complete.csv # Contains entries for both files and folders. # Files do not have a total size, only a local size. # # Customize the FTP_* variables below. # # Basically does a depth-first search. # # Written by Matthew L Beckler, matthew at mbeckler dot org. # Released into the public domain, do whatever you like with this. # Email me if you like the script or have suggestions to improve it. from ftplib import FTP from time import sleep FTP_SERVER = "ftp.debian.org" FTP_PORT = "21" # 21 is the default FTP_USER = "" # leave empty for anon FTP server FTP_PASS = "" FTP_DELAY = 1 # how long to wait between calls to the ftp server def parseListLine(line): # Files look like "-rw-r--r-- 1 1176 1176 176158 Mar 30 01:52 README.mirrors.html" # Directories look like "drwxr-sr-x 15 1176 1176 4096 Feb 15 09:22 dists" # Returns (name, isDir, sizeBytes) items = line.split() return (items[8], items[0][0] == "d", int(items[4])) # Since the silly ftp library makes us use a callback to handle each line of text from the server, # we have a global lines buffer. Clear the buffer variable before doing each call. lines = [] def appendLine(line): global lines lines.append(line) def getListingParsed(ftp): """ This is a sensible interface to the silly line getting system. Returns a copy of the directory listing, parsed. """ global lines lines = [] ftp.dir(appendLine) myLines = lines[:] parsedLines = map(parseListLine, myLines) return parsedLines def descendDirectories(ftp): # Will return a tuple for the current ftp directory, like this: # (pwd, subdirList, fileList, sizeInFilesHere, sizeTotal) # pwd is a string like "/debian/dists/experimental" # subdirList is a list of tuples just like this one # fileList is a list of (filename, sizeInBytes) tuples # sizeInFilesHere is a sum of all the files in this directory # sizeTotal is a sum of all the files in this directory and all subdirectories sleep(FTP_DELAY) # be a nice client # make our directory structure to return pwd = ftp.pwd() subdirList = [] fileList = [] sizeInFilesHere = 0 sizeTotal = 0 print pwd + "/" items = getListingParsed(ftp) for name, isDir, sizeBytes in items: if not isDir: fileList.append( (name, sizeBytes) ) sizeInFilesHere += sizeBytes else: # is a directory, so recurse ftp.cwd(name) struct = descendDirectories(ftp) ftp.cwd("..") subdirList.append(struct) sizeTotal += struct[4] # add in the size of all files here to sizeTotal sizeTotal += sizeInFilesHere return (pwd, subdirList, fileList, sizeInFilesHere, sizeTotal) def pprintBytes(b): """ Pretty prints a number of bytes with a proper suffix, like K, M, G, T. """ suffixes = ["", "K", "M", "G", "T", "?"] ix = 0 while (b > 1024): b /= 1024.0 ix += 1 s = suffixes[min(len(suffixes) - 1, ix)] if int(b) == b: return "%d%s" % (b, s) else: return "%.1f%s" % (b, s) def pprintDirStruct(dirStruct): """ Pretty print the directory structure. RECURSIVE FUNCTION! """ print "{}/ ({} in {} files here, {} total)".format(dirStruct[0], pprintBytes(dirStruct[3]), len(dirStruct[2]), pprintBytes(dirStruct[4])) for ds in dirStruct[1]: pprintDirStruct(ds) def saveDirStructToCSV(dirStruct, fid, includeFiles): """ Save the directory structure to a CSV file. RECURSIVE FUNCTION! """ # Info about this directory itself fid.write("\"{}/\",{},{}\n".format(dirStruct[0], dirStruct[3], dirStruct[4])) pwd = dirStruct[0] # Info about files here if includeFiles: for name, size in dirStruct[2]: fid.write("\"{}\",{},\n".format(pwd + "/" + name, size)) # Info about dirs here, recurse for ds in dirStruct[1]: saveDirStructToCSV(ds, fid, includeFiles) print "Connecting to FTP server '%s' port %s..." % (FTP_SERVER, FTP_PORT) ftp = FTP() ftp.connect(FTP_SERVER, FTP_PORT) if FTP_USER == "": ftp.login() else: ftp.login(FTP_USER, FTP_PASS) print "Walking directory structure..." dirStruct = descendDirectories(ftp) print "" print "Finished descending directories, here is the info:" pprintDirStruct(dirStruct) print "" FILENAME = "dirStruct_complete.csv" print "Saving complete directory info (files and folders) to a CSV file: '%s'" % FILENAME with open(FILENAME, "w") as fid: fid.write("\"Path\",\"Local size\",\"Total size\"\n") saveDirStructToCSV(dirStruct, fid, includeFiles=True) FILENAME = "dirStruct_only_folders.csv" print "Saving directory info (only folders) to a CSV file: '%s'" % FILENAME with open(FILENAME, "w") as fid: fid.write("\"Path\",\"Local size\",\"Total size\"\n") saveDirStructToCSV(dirStruct, fid, includeFiles=False)
Sample CSV output:
"Path","Local size","Total size" "/plugins/",5426535,7594527 "/plugins/foo-1.1.jar",7774, "/plugins/CHANGELOG.txt",45169,
Local size is just the size of the file itself, or the size of all files in a directory. Total size is the total size of the files in a directory plus the total sizes of all subdirectories. Files do not have a total size entry.