Skip to main content

Python - The Filesystem

Sham Sui Po, Hong Kong

Github Repository

Pathlib

from pathlib import Path

print(dir(Path))
['__bytes__', '__class__', '__class_getitem__', '__delattr__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__fspath__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rtruediv__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__truediv__', '_accessor', '_cached_cparts', '_cparts', '_drv', '_format_parsed_parts', '_from_parsed_parts', '_from_parts', '_hash', '_make_child', '_make_child_relpath', '_parse_args', '_parts', '_pparts', '_root', '_str', 'absolute', 'anchor', 'as_posix', 'as_uri', 'chmod', 'cwd', 'drive', 'exists', 'expanduser', 'glob', 'group', 'hardlink_to', 'home', 'is_absolute', 'is_block_device', 'is_char_device', 'is_dir', 'is_fifo', 'is_file', 'is_mount', 'is_relative_to', 'is_reserved', 'is_socket', 'is_symlink', 'iterdir', 'joinpath', 'lchmod', 'link_to', 'lstat', 'match', 'mkdir', 'name', 'open', 'owner', 'parent', 'parents', 'parts', 'read_bytes', 'read_text', 'readlink', 'relative_to', 'rename', 'replace', 'resolve', 'rglob', 'rmdir', 'root', 'samefile', 'stat', 'stem', 'suffix', 'suffixes', 'symlink_to', 'touch', 'unlink', 'with_name', 'with_stem', 'with_suffix', 'write_bytes', 'write_text']

Write to File

from pathlib import Path

path = Path('files/test1.md')

content = """# Test 1

this is a test
"""

if not path.exists():
with open(path, 'w') as file:
file.write(content)

Create Empty Files

for i in range(0, 3):
filename = 'test' + str(i) + '.md'
filepath = root_dir / Path(filename)
filepath.touch()

List Files in Directory

directory = Path('files')

for item in directory.iterdir():
with open(item, 'r') as file:
print(file.read())

Rename Files

from pathlib import Path

root_dir = Path('files')

# Replace the file extension

file = Path('files/test2.md')
filename = file.with_suffix('.csv')
file.rename(filename)

# Add static prefix to filename

file_paths = root_dir.iterdir()

for path in file_paths:
if path.is_file():
new_filename = "prefix_" + path.stem + path.suffix
# print(new_filename)
new_path = path.with_name(new_filename)
path.rename(new_path)

# Add suffix based on sub directory

recursive_paths = root_dir.glob('**/*')

for path in recursive_paths:
if path.is_file():
parent_folder = path.parts[-2]
# print(parent_folder)
new_filename = path.stem + '_' + parent_folder + path.suffix
# print(new_filename)
new_path = path.with_name(new_filename)
path.rename(new_path)

Get File Properties

Path() gives us access to file properties:

path = Path('files/test1.md')
stats = path.stat()
print(stats)
  • Filesize: st_size
  • File last accessed: st_atime
  • File last modified: st_mtime
  • File created: st_ctime
os.stat_result(st_mode=33279, st_ino=1809540, st_dev=2050, st_nlink=1, st_uid=1000, st_gid=1001, st_size=25, st_atime=1665554138, st_mtime=1665553956, st_ctime=1665574233)

We can use those properties to further process the file:

from pathlib import Path
from datetime import datetime

path = Path('files/test1.md')
stats = path.stat()

# Get file size and set unit
def get_size():
file_bytes = stats.st_size
file_kilobytes = file_bytes / 1024
file_megabytes = file_kilobytes / 1024

if file_megabytes > 1:
return str(file_megabytes) + ' MB'
elif file_kilobytes > 1:
return str(file_kilobytes) + ' kB'
else:
return str(file_bytes) + ' B'

# Get date last accessed and process timestamp
last_accessed = stats.st_ctime
date_accessed = datetime.fromtimestamp(last_accessed).strftime("%Y-%m-%d_%H:%M:%S")



# Append timestamp to file
with open(path, 'a') as file:
file.write('\n' + date_accessed + ' | ' + get_size() + '\n')

Zip and Unzip

Write to zip container and add timestamp:

from pathlib import Path
from datetime import datetime
import zipfile

root_dir = Path('files/projectA')
# get timestamp for archive name
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
archive_name = now + '_archive.zip'
archive_path = root_dir / Path(archive_name)

# write all markdown pages in dir to zip container
with zipfile.ZipFile(archive_path, 'w') as zf:
for path in root_dir.glob('*.md'):
zf.write(path)
# delete source files
# path.unlink()

Unzip all containers from a directory recursively to individual destination folder:

# unzip all containers in root dir recursively
destination_path = Path('files/unzipped')

for path in root_dir.rglob('*.zip'):
with zipfile.ZipFile(path, 'r') as zf:
sub_dir = destination_path / Path(path.stem)
zf.extractall(path=sub_dir)

Downloading Code from Github

Navigate to your project page and select the latest release to get the download URL. Then add a Python script to handle the HTTP download:

import requests
import tarfile
import os

version = '1.4.19'
url = 'https://github.com/hashicorp/terraform-provider-nomad/archive/refs/tags/v' + version + '.tar.gz'

archive = requests.get(url).content

with open('src/terraform_provider_nomad.tar.gz', 'wb') as file:
file.write(archive)

# extract all
archive_extraction = tarfile.open('src/terraform_provider_nomad.tar.gz')
archive_extraction.extractall('src')
archive_extraction.close()

# single file extraction
container_path = 'terraform-provider-nomad-' + version + '/CHANGELOG.md'
file_extraction = tarfile.open('./src/terraform_provider_nomad.tar.gz', 'r:gz')
file_extraction.extract(container_path,'./version')
file_extraction.close()

# output
os.remove("src/terraform_provider_nomad.tar.gz")
print('INFO :: All files downloaded and extracted to src/terraform-provider-nomad-' + version)
with open('version/terraform-provider-nomad-' + version + '/CHANGELOG.md', 'r') as file:
print(file.read())

Find all files and folders that contain a search term:

from pathlib import Path

root_dir = Path('files')
search_term = 'test'

for path in root_dir.rglob('*'):
if search_term in path.stem:
print(path.absolute())

Delete Files

Delete all zip container recursively from a root directory securely:

from pathlib import Path

root_dir = Path('files/projectA')

for path in root_dir.rglob("*.zip"):
with open(path, 'wb') as file:
# overwrite with empty bytes to delete secure
file.write(b'')
# delete all zip files
path.unlink()

And wrapping everything up into a PyQT6 desktop application:

Python - The Filesystem

from PyQt6.QtWidgets import (
QApplication,
QVBoxLayout,
QHBoxLayout,
QPushButton,
QWidget,
QLabel,
QFileDialog
)
from pathlib import Path

def open_files():
global filenames
# return absolute path of user selected files
filenames, _ = QFileDialog.getOpenFileNames(window, 'Select files')
message.setText('\n'.join(filenames))

def delete_files():
for filename in filenames:
path = Path(filename)
with open(path, 'wb') as file:
file.write(b'')
path.unlink()
message.setText('Deleted!')


app = QApplication([])
window = QWidget()
window.setWindowTitle('Destroyer of Worlds')

# set layout

layout_main = QVBoxLayout()
layout_top_container = QHBoxLayout()
layout_main.addLayout(layout_top_container)
layout_bottom_container = QHBoxLayout()
layout_main.addLayout(layout_bottom_container)

# select files to delete

description = QLabel('Select files for <font color="red">deletion</font>: ')
layout_top_container.addWidget(description)

open_btn = QPushButton('Open Files')
open_btn.setToolTip('Open files and select for deletions.')
layout_top_container.addWidget(open_btn)
open_btn.clicked.connect(open_files)

# delete selected files

del_btn = QPushButton('Delete Files')
del_btn.setToolTip('Permanently delete all selected files.')
layout_bottom_container.addWidget(del_btn)
del_btn.clicked.connect(delete_files)

# show filepath of selected files

message = QLabel('')
layout_main.addWidget(message)

# run app

window.setLayout(layout_main)
window.show()
app.exec()