2022-09-03 20:20:01 +00:00
# Author: Scito (https://scito.ch)
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
2022-12-30 19:37:38 +00:00
from __future__ import annotations # for compatibility with PYTHON < 3.11
2022-09-03 20:20:01 +00:00
import csv
2022-12-26 17:31:09 +00:00
import glob
import io
2022-09-03 20:20:01 +00:00
import json
import os
2022-12-26 17:31:09 +00:00
import re
2022-09-09 11:13:13 +00:00
import shutil
2022-09-03 21:47:43 +00:00
import sys
2022-12-29 20:29:20 +00:00
import pathlib
2022-12-30 00:22:05 +00:00
from typing import BinaryIO , Any , Union , List
2022-12-29 22:17:31 +00:00
# Types
# PYTHON < 3.10: Workaround for str | pathlib.Path
PathLike = Union [ str , pathlib . Path ]
2022-09-03 21:47:43 +00:00
# Ref. https://stackoverflow.com/a/16571630
2022-12-30 00:22:05 +00:00
# PYTHON 3.11: class Capturing(list[Any]):
class Capturing ( List [ Any ] ) :
2022-09-03 21:47:43 +00:00
''' Capture stdout and stderr
Usage :
with Capturing ( ) as output :
print ( " Output " )
'''
2022-12-30 14:31:41 +00:00
# TODO remove type ignore if fixed, see https://github.com/python/mypy/issues/11871, https://stackoverflow.com/questions/72174409/type-hinting-the-return-value-of-a-class-method-that-returns-self
2022-12-29 20:29:20 +00:00
def __enter__ ( self ) : # type: ignore
2022-09-03 21:47:43 +00:00
self . _stdout = sys . stdout
2022-12-25 10:00:15 +00:00
sys . stdout = self . _stringio_std = io . StringIO ( )
2022-12-22 00:47:31 +00:00
self . _stderr = sys . stderr
2022-12-25 10:00:15 +00:00
sys . stderr = self . _stringio_err = io . StringIO ( )
2022-09-03 21:47:43 +00:00
return self
2022-12-29 20:29:20 +00:00
def __exit__ ( self , * args : Any ) - > None :
2022-12-22 00:47:31 +00:00
self . extend ( self . _stringio_std . getvalue ( ) . splitlines ( ) )
del self . _stringio_std # free up some memory
2022-09-03 21:47:43 +00:00
sys . stdout = self . _stdout
2022-12-22 00:47:31 +00:00
self . extend ( self . _stringio_err . getvalue ( ) . splitlines ( ) )
del self . _stringio_err # free up some memory
sys . stderr = self . _stderr
2022-09-03 20:20:01 +00:00
2022-12-29 22:17:31 +00:00
def file_exits ( file : PathLike ) - > bool :
2022-12-04 11:23:39 +00:00
return os . path . isfile ( file )
2022-12-29 22:17:31 +00:00
def remove_file ( file : PathLike ) - > None :
2022-12-04 11:23:39 +00:00
if file_exits ( file ) : os . remove ( file )
2022-12-29 20:29:20 +00:00
def remove_files ( glob_pattern : str ) - > None :
2022-12-04 11:23:39 +00:00
for f in glob . glob ( glob_pattern ) :
os . remove ( f )
2022-09-09 11:13:13 +00:00
2022-12-29 22:17:31 +00:00
def remove_dir_with_files ( dir : PathLike ) - > None :
2022-09-09 11:13:13 +00:00
if os . path . exists ( dir ) : shutil . rmtree ( dir )
2022-09-03 20:20:01 +00:00
2022-12-30 00:22:05 +00:00
def read_csv ( filename : str ) - > List [ List [ str ] ] :
2022-09-03 20:20:01 +00:00
""" Returns a list of lines. """
2022-12-19 15:39:28 +00:00
with open ( filename , " r " , encoding = " utf-8 " , newline = ' ' ) as infile :
2022-12-30 00:22:05 +00:00
lines : List [ List [ str ] ] = [ ]
2022-09-03 20:20:01 +00:00
reader = csv . reader ( infile )
for line in reader :
lines . append ( line )
return lines
2022-12-30 00:22:05 +00:00
def read_csv_str ( data_str : str ) - > List [ List [ str ] ] :
2022-12-18 18:24:07 +00:00
""" Returns a list of lines. """
2022-12-30 00:22:05 +00:00
lines : List [ List [ str ] ] = [ ]
2022-12-29 20:29:20 +00:00
reader = csv . reader ( data_str . splitlines ( ) )
2022-12-18 18:24:07 +00:00
for line in reader :
lines . append ( line )
return lines
2022-12-29 20:29:20 +00:00
def read_json ( filename : str ) - > Any :
2022-09-03 20:20:01 +00:00
""" Returns a list or a dictionary. """
2022-12-19 15:39:28 +00:00
with open ( filename , " r " , encoding = " utf-8 " ) as infile :
2022-09-03 20:20:01 +00:00
return json . load ( infile )
2022-09-03 21:47:43 +00:00
2022-12-29 20:29:20 +00:00
def read_json_str ( data_str : str ) - > Any :
2022-12-18 18:24:07 +00:00
""" Returns a list or a dictionary. """
2022-12-29 20:29:20 +00:00
return json . loads ( data_str )
2022-12-18 18:24:07 +00:00
2022-12-30 00:22:05 +00:00
def read_file_to_list ( filename : str ) - > List [ str ] :
2022-09-03 21:47:43 +00:00
""" Returns a list of lines. """
2022-12-19 15:39:28 +00:00
with open ( filename , " r " , encoding = " utf-8 " ) as infile :
2022-09-03 21:47:43 +00:00
return infile . readlines ( )
2022-12-29 20:29:20 +00:00
def read_file_to_str ( filename : str ) - > str :
2022-09-03 21:47:43 +00:00
""" Returns a str. """
return " " . join ( read_file_to_list ( filename ) )
2022-12-24 00:59:35 +00:00
2022-12-29 14:52:17 +00:00
2022-12-29 20:29:20 +00:00
def read_binary_file_as_stream ( filename : str ) - > BinaryIO :
2022-12-24 00:59:35 +00:00
""" Returns binary file content. """
with open ( filename , " rb " , ) as infile :
2022-12-25 10:00:15 +00:00
return io . BytesIO ( infile . read ( ) )
2022-12-26 17:31:09 +00:00
2022-12-29 14:52:17 +00:00
2022-12-29 20:29:20 +00:00
def replace_escaped_octal_utf8_bytes_with_str ( str : str ) - > str :
2022-12-26 17:31:09 +00:00
encoded_name_strings = re . findall ( r ' name: .*$ ' , str , flags = re . MULTILINE )
for encoded_name_string in encoded_name_strings :
escaped_bytes = re . findall ( r ' ((?: \\ [0-9]+)+) ' , encoded_name_string )
for byte_sequence in escaped_bytes :
2022-12-26 23:18:37 +00:00
unicode_str = b ' ' . join ( [ int ( byte , 8 ) . to_bytes ( 1 , ' little ' ) for byte in byte_sequence . split ( ' \\ ' ) if byte ] ) . decode ( ' utf-8 ' )
2022-12-26 17:31:09 +00:00
print ( " Replace ' {} ' by ' {} ' " . format ( byte_sequence , unicode_str ) )
str = str . replace ( byte_sequence , unicode_str )
return str
2022-12-29 20:29:20 +00:00
def quick_and_dirty_workaround_encoding_problem ( str : str ) - > str :
2022-12-26 17:31:09 +00:00
return re . sub ( r ' name: " encoding: .*$ ' , ' ' , str , flags = re . MULTILINE )