#!/usr/bin/env python3 # File: fuss.py # Location: fuss source # Author: bgstack15 # SPDX-License-Identifier: GPL-3.0 # Startdate: 2020-12-21 # Title: File Upload and Storage Service main app # Purpose: Demonstrate web technologies # Usage: Use /usr/sbin/fuss.bin or system service # References: # Improve: # Dependencies: # python3-flask-script, python3-magic, python3-uwsgidecorators # vim:set ts=3 sw=3 sts=3 et: from flask import Flask, make_response, Response, abort, send_from_directory, render_template, request, redirect, url_for from werkzeug.datastructures import FileStorage from werkzeug.utils import secure_filename from werkzeug.middleware.proxy_fix import ProxyFix from flask_script import Manager, Server # python3-flask-script import magic # python3-magic, see below from hashlib import sha256 from mimetypes import guess_extension import os, sys, time, json, base64 from datetime import datetime, date from uwsgidecorators import * # python3-uwsgidecorators from logging.config import dictConfig app = Flask(__name__) manager = Manager(app) ############################################ # Functions def now(): return int(time.mktime(datetime.today().timetuple())) def all_same(items): return all(x == items[0] for x in items) def trim_dict(a,max_length=40): """ Shorten long strings in a dictionary, primarily for displaying partial contents of icondata metadata """ b={} for i in a: if isinstance(a[i], str): if len(a[i]) > max_length: b[i] = a[i][0:max_length] + "..." else: b[i] = a[i] return b # load config file # Load it from the current directory, which is not FHS-compliant #conf_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),"fuss.conf") conf_file = "/etc/fuss.conf" app.config.from_pyfile(conf_file, silent=True) if "WSGI_LOGGING" in app.config: dictConfig(app.config["WSGI_LOGGING"]) # Load gtk only if requested, because who really wants to load that if it is not necessary? if "ICONS" in app.config and app.config["ICONS"]: import gi # python3-gi gi.require_version("Gtk", "3.0") from gi.repository import Gio, Gtk # alias the config values up = app.config["UPLOAD_PATH"] ulp = app.config["UPLOAD_PREFIX"] dlp = app.config["DOWNLOAD_PREFIX"] aps = "" try: aps = app.config["APP_SERVER"] except: pass ap = "" try: ap = app.config["APP_PREFIX"] except: pass ap_s = ap.strip("/") + "/" s_ap_s = "/" + ap.strip("/") + "/" ulp_s = ulp.strip("/") + "/" aps_ap = aps.strip("/") + str("/" + ap.strip("/") + "/").replace("//","/") ap_ulp = "/" + ap.strip("/") + "/" + ulp.strip("/") + "/" aps_ap_ulp = aps_ap + ulp_s aps_ap_dlp = aps_ap + dlp.strip("/") + "/" # these values are for internal use only app.config["ACCEPTED_FORMATS"] = ["text","json","xml","html","html-long"] mime_dict = { "json": "application/json", "text": "text/plain", "xml": "application/xml", "html": "text/html", "html-long": "text/html" } app.config["VISIBLE_CONFIGS"] = [ "APP_HOST", "APP_PORT", "APP_PREFIX", "DOWNLOAD_PREFIX", "UPLOAD_PREFIX", "UPLOAD_PATH", "MIMETYPE_BLACKLIST", "MAX_FILE_SIZE", "MAX_DUPLICATE_NAMES", "USE_X_ACCEL_REDIRECT", "APP_SERVER", "LOOP_DELAY", "META", "META_VISIBLE", "META_HEADERS", "ICONS", "ICON_THEME" ] try: mimedetect = magic.Magic(mime=True, mime_encoding=False) USE_DARWIN_MAGIC = False except: # then perhaps we are using the Fedora-packaged magic which for some reason is from a different source. # Debian uses https://github.com/ahupp/python-magic/ # Fedora uses http://www.darwinsys.com/file/ import magic USE_DARWIN_MAGIC = True # print("""Error: You have installed the wrong version of the 'magic' module. #Please install python-magic.""") # sys.exit(1) def detect_from_file(filename): if not USE_DARWIN_MAGIC: # ahupp way #return mimedetect.from_buffer(filename) return mimedetect.from_file(filename) else: return magic.detect_from_filename(filename).mime_type return None def detect_from_buffer(buffer): if not USE_DARWIN_MAGIC: return mimedetect.from_buffer(buffer) else: return magic.detect_from_content(buffer) return "sample" # because the x_for level must be precise. # Apache needs RequestHeader statements like so, assuming this is in a block with SSLEngine on. # # RequestHeader append X-Forwarded-Prefix "/fuss" # RequestHeader set X-Forwarded-Proto "https" # pl = 0 if "PROXY_LAYERS" in app.config: pl = app.config["PROXY_LAYERS"] app.wsgi_app = ProxyFix(app.wsgi_app,x_for=pl,x_host=pl,x_port=pl,x_prefix=pl,x_proto=pl) def list_of_files(path=None, meta=False): if path is None: path = up files = [] try: files2 = os.listdir(path) for file in files2: # never list meta files even if META_VISIBLE if (not meta and not file.endswith(".meta")) or (meta and file.endswith(".meta")): files.append(os.path.join(path,file)) except Exception as E: return "Problem fetching files from path {0}: {1}".format(path,E) return files def stats(file, lastmodified=-1): if isinstance(file, FileStorage): # if file is incoming stream data = file.stream.read() if file.content_length > 0: size = file.content_length else: size = len(data) sha256sum = sha256(data).hexdigest() if not file.content_type or not "/" in file.content_type or file.content_type == "application/octet-stream": mimetype = detect_from_buffer(data) else: mimetype = file.content_type if int(lastmodified) > 0: ts = lastmodified else: ts = now() return file.filename, size, ts, sha256sum, mimetype else: # so far, assume it is a regular os PathLike file or string name = os.path.basename(file) stat = os.stat(file) size = stat.st_size with open(file,"rb") as f: sha256sum = sha256(f.read()).hexdigest() ts = int(stat.st_mtime) mimetype = detect_from_file(file) return name, size, ts, sha256sum, mimetype if False: return str(file), 0, 0, 0, "plain/unknown" def pprint(file,format="text",cr=False,file_app_prefix=aps_ap_dlp): message = "" if format not in app.config["ACCEPTED_FORMATS"]: app.logger.warn("format {0} is not in {1}".format(format,accepted_formats)) format = "text" name, size, ts, sha256sum, mimetype = stats(file) if app.config["META"]: metadata = meta(file) if format == "text": message = "{0},{1},{2},{3},{4}".format(name,size,ts,sha256sum,mimetype) + ("\n" if cr else "") elif format == "html": message = '{1}'.format(aps_ap_dlp,name) + ("
\n" if cr else "") elif format == "html-long": message += "" for word in [name,size,ts,sha256sum,mimetype]: if word != name: message += "{0}".format(word) else: if app.config["META"] and app.config["ICONS"]: icon = "" try: icon = metadata["icondata"] except: pass message += '{1}'.format(file_app_prefix,word,icon) else: message += '{1}'.format(file_app_prefix,word) message += "\n" elif format == "json": message = { 'name': name, 'size': size, 'timestamp': ts, 'sha256sum': sha256sum, 'mimetype': mimetype } return message def print_files(path=None,format="text",header=False): result = "" files = list_of_files(path) if format == "json": fl = [] for file in files: fl.append(pprint(file,format="json",cr=False)) result = json.dumps({ 'files': fl }) elif format == "html": for file in files: result += pprint(file,format=format,cr=True) elif format == "html-long": _s_ap_s = s_ap_s _aps_ap_dlp = aps_ap_dlp # discover how the app was accessed, if configured _server = aps _prefix = ap if "DISCOVER_URL" in app.config and app.config["DISCOVER_URL"]: try: _server = request.headers["X-Forwarded-Proto"] + "://" + request.headers["X-Forwarded-Host"] _temp = _server.replace(", ", "\n").split('\n') if len(_temp) > 1: _server = _temp[0] #print("Customized _server {0}".format(_server)) except: _server = "http://" + request.headers["Host"] try: _prefix = request.headers["X-Forwarded-Prefix"] _temp = _prefix.replace(", ", "\n").split('\n') if len(_temp) > 1: _prefix = _temp[0] except: _prefix = "/" app.logger.warn("Failed to get x-forwarded-prefix") _prefix = str("/" + _prefix.strip("/") + "/").replace("//","/") _aps_ap_dlp = _server.strip("/") + str(_prefix + dlp.strip("/") + "/").replace("//","/") app.logger.info("Using customized from headers, aps_ap_dlp: {0}".format(_aps_ap_dlp)) result += "\n\n".format(_prefix) result += "\n" if header: result += "" for word in ['link','size','timestamp','sha256sum','mimetype']: result += "".format(word) result += "\n" for file in files: result += pprint(file,format=format,cr=True,file_app_prefix=_aps_ap_dlp) result += "
{0}
" else: # assume "text" if header: result += "# {0},{1},{2},{3},{4}\n".format( "filename", "size", "timestamp", "sha256sum", "mimetype" ) for file in files: result += pprint(file,cr=True) #print("HEADERS for request-files:") ##for i in dict(request.headers): # print("HEAD: {0}: {1}".format(i,request.headers[i])) return result def print_config(app,format="text",header=False): result = "" if format == "json": fl = {} for item in app.config: if item in app.config["VISIBLE_CONFIGS"]: fl[item]=app.config[item] result = json.dumps({'config': fl}) else: # assume text for item in app.config: if item in app.config["VISIBLE_CONFIGS"]: result += str(item) + "=" + str(app.config[item]) + "\n" return result # will only be called if META=true def meta(file, variable=None, value=None, debug=False): #print("META WAS CALLED! '{0}' '{1}' '{2}'".format(file,variable, value)) metafile = os.path.join(os.path.dirname(file), "."+os.path.basename(file)+".meta" ) metadata = {} try: try: with open(metafile,"r") as mf: metadata = json.loads(mf.read()) except: # probably no file exists pass # if writing to the metadata file if value: metadata[variable] = value if debug: #print("META DEBUG assigning {0} = {1}".format(variable,value)) #print("META DEBUG metafile {0}".format(metafile)) app.logger.info(json.dumps(metadata)) with open(metafile,"w") as mf: json.dump(metadata,mf) if debug: for i in metadata: app.logger.info("META DEBUG i={0} value={1}".format(i,metadata[i])) except Exception as E: app.logger.error("Error happened: {0}".format(E)) pass # return a dictionary of the values from the meta file if variable and variable in metadata: return metadata[variable] # otherwise, return the dictionary return metadata def store_file(file, lastmodified=-1,addr=None): skip_write = False name, size, ts, sha256sum, mimetype = stats(file, lastmodified) try: # reset stream so I can read it again here file.seek(0) except: pass data = file.stream.read() app.logger.info(name + " " + str(size) + " " + str(ts) + " " + str(sha256sum) + " " + mimetype) # Ensure mimetype is allowed if mimetype in app.config["MIMETYPE_BLACKLIST"]: app.logger.info("Problem with filetype.") return "Mimetype {0} is disallowed.\n".format(mimetype), 415 # WHITELIST functionality would go here if implemented. # Verify it is a safe filename # manually check for '-' which secure_filename thinks is OK but it is not. if name == '-': # then the content was piped in to curl -F 'file=@-' name = "stdin" dest_filename = secure_filename(name) dest_file = os.path.join(up, dest_filename) if os.path.exists(dest_file): ename, esize, ets, esum, emime = stats(dest_file) if esum == sha256sum: skip_write = True app.logger.info("Existing file {0} is same as uploaded file.".format(ename)) else: x = 0 safe = False # if file is not identical, increment until it is a new number and then save while not safe: x += 1 if x >= app.config["MAX_DUPLICATE_NAMES"]: app.logger.error("Have to stop trying now.") return "Too many versions already exist for {0}\n".format(dest_filename), 409 name_array = dest_filename.split(".") ext = "" if len(name_array) > 1: name_array[-2] = name_array[-2] + "-" + str(x) temp_dest_filename = '.'.join(name_array) else: temp_dest_filename = dest_filename + "-" + str(x) temp_dest_file = os.path.join(up, temp_dest_filename) if os.path.exists(temp_dest_file): ename, esize, ets, esum, emime = stats(temp_dest_file) # if file is identical, continue silently if esum == sha256sum: safe = True skip_write = True app.logger.info("Existing file {0} is same as uploaded file.".format(ename)) dest_filename = temp_dest_filename dest_file = temp_dest_file else: safe = True app.logger.info("Found safe filename {0}".format(temp_dest_file)) dest_filename = temp_dest_filename dest_file = temp_dest_file #else: if not skip_write: if size > app.config["MAX_FILE_SIZE"]: return "File size {0} is too large for limit {1}\n".format(size,app.config["MAX_FILE_SIZE"]), 413 with open(dest_file, "wb") as f: f.write(data) # write timestamp ts = int(ts) if ts > 0: os.utime(dest_file, (ts,ts)) if app.config["META"]: meta(dest_file,"address",addr) meta(dest_file,"uploaded",now()) app.logger.info("SAVED TO " + dest_file) return url_for("get",filename=dest_filename, _external=True) + "\n", 301 if skip_write else 201 def html_template(filename="",full=False): # discover how the app was accessed, if configured _server = aps _prefix = ap if "DISCOVER_URL" in app.config and app.config["DISCOVER_URL"]: try: _server = request.headers["X-Forwarded-Proto"] + "://" + request.headers["X-Forwarded-Host"] _temp = _server.replace(", ", "\n").split('\n') if len(_temp) > 1: _server = _temp[0] #print("Customized _server {0}".format(_server)) except: _server = "http://" + request.headers["Host"] try: _prefix = request.headers["X-Forwarded-Prefix"] _temp = _prefix.replace(", ", "\n").split('\n') if len(_temp) > 1: _prefix = _temp[0] except: _prefix = "/" if full: # takes extra processing max_size = app.config["MAX_FILE_SIZE"] size_suffix = "B" if max_size > 1024 * 10: max_size = max_size/1024 size_suffix = "KB" if max_size > 1024 * 10: max_size = max_size/1024 size_suffix = "MB" if max_size > 1024 * 3: max_size = max_size/1024 size_suffix = "GB" if size_suffix == "GB": max_size = round(max_size, 2) else: max_size = int(max_size) meta = False meta_visible = False meta_headers = [] icons = False try: meta = app.config["META"] except: pass try: meta_visible = app.config["META_VISIBLE"] except: pass try: meta_headers = app.config["META_HEADERS"] except: pass try: icons = app.config["ICONS"] except: pass return render_template(filename + ".j2", server = _server, prefix = _prefix, ulp = ulp, file_count = len(list_of_files()), max_size = str(max_size) + size_suffix, mimetype_blacklist = app.config["MIMETYPE_BLACKLIST"], max_dupe = app.config["MAX_DUPLICATE_NAMES"], meta = meta, meta_visible = meta_visible, meta_headers = meta_headers, icons = icons ) else: #print("was asked for {0}".format(filename)) return render_template(filename + ".j2", server = _server, prefix = _prefix, ulp = ulp ) @timer(app.config["LOOP_DELAY"]) def loop(num): lof = list_of_files(meta=True) app.logger.info("Loop starts {0}".format(datetime.today())) # Task 1: clean up meta files for files that no longer exist for f in lof: nonmetafile=os.path.join(os.path.dirname(f),os.path.basename(f).lstrip(".").rstrip(".meta")) if not os.path.exists(nonmetafile): try: os.remove(f) except: app.logger.error("Please fix the @timer LOOP_DELAY") #print("Process {0}".format(nonmetafile)) # must regenerate list becauase we might have shortened it # Task 2: add icon if undefined # FUTURE IMPROVEMENT: PERFORMANCE needs to be improved. For some reason, some meta files have "icondata" values but these values are not recognized by the dict lookup, so it looks up their icontype every single run. if "ICONS" in app.config and app.config["ICONS"]: lof = list_of_files(meta=False) if "ICON_THEME" in app.config: # this seems to fail silently and just load "hicolor" or something basic, which is good enough. Gtk.Settings.get_default().set_property('gtk-icon-theme-name',app.config["ICON_THEME"]) icon_theme = Gtk.IconTheme.get_default() for f in lof: name, _, _, _, mimetype = stats(f) fmetadata = meta(f) fm_trim = trim_dict(fmetadata) app.logger.info("FOR FILE " + name) app.logger.info("found metadata " + str(fm_trim)) app.logger.info("SHOWING ITEMS IN fmetadata length {1} for {0}".format(f,len(fmetadata))) #for item in fmetadata: # print("item: {0} value: {1}".format(item,fmetadata[item])) #_ = fmetadata["icondata"] try: _ = fmetadata["icondata"] except: app.logger.info("NEED to add icondata for {0}".format(name)) icon = Gio.content_type_get_icon(mimetype) image_file = None image_base64 = None for entry in icon.to_string().split(): if entry != "." and entry != "GThemedIcon": #print("Need to check entry {0}".format(entry)) try: image_file = icon_theme.lookup_icon(entry,32,0).get_filename() except: # this entry in the list must not have an image pass if image_file: break #print("Found icon {0}".format(icon.to_string())) #icon_file = icon_theme.lookup_icon(icon.to_string(), 48, 0) #print("file {0} is type {1}, icon {2}".format(name,mimetype,image_file)) if image_file: app.logger.info("Found for {0} icon {1}".format(name,image_file)) # load data from file, save as base64-encoded image # by following symlink, we can avoid mimetype of "inode/symlink" followed_link = os.path.realpath(image_file) _, _, _, _, image_mime = stats(followed_link) with open(image_file,"rb") as i: raw = i.read() image_base64 = "data:{0};base64,".format(image_mime) + str(base64.b64encode(raw).decode('utf-8')) meta(f,"icondata",image_base64,debug=True) #print("file {0} is type {1}, icondata {2}".format(name,mimetype,image_base64)) app.logger.info("Found for {0} file {1}".format(name,image_file)) # END for f in lof. @app.route("/") def root(): return html_template("index.html",True) @app.route("/robots.txt") def robots(): return send_from_directory("static","robots.txt") @app.route("/fuss-upload") def fuss_upload(): return send_from_directory('/usr/share/fuss/static',"fuss-upload",mimetype="application/x-shellscript") @app.route("/dump_files/") @app.route("/dump_files/") def show_files(format="text"): if format not in mime_dict: format = "text" return Response(print_files(up,format=format, header=True), mimetype=mime_dict[format]) @app.route("/dump_config/") @app.route("/dump_config/") def show_config(format="text"): #def gen(): # for item in app.config: # if item in app.config["VISIBLE_CONFIGS"]: # yield str(item) + "=" + str(app.config[item]) + "\n" #return Response(gen(), mimetype="text/plain") if format not in mime_dict: format = "text" return Response(print_config(app,format=format, header=True), mimetype=mime_dict[format]) # Upload route @app.route("/" + ulp_s, methods=["GET","POST"]) def upload(): message = "" headers = None if request.headers: headers = dict(request.headers) if request.method == "POST": # debug #for h in headers: # print("{0}: \"{1}\"".format(h,headers[h])) #print("END OF HEADERS") if "DISCOVER_URL" in app.config and app.config["DISCOVER_URL"]: if "X-Forwarded-For" in headers: pl = len(headers["X-Forwarded-Host"].split(", ")) app.wsgi_app = ProxyFix(app.wsgi_app,x_for=pl,x_host=pl,x_port=pl,x_prefix=pl,x_proto=pl) response_codes = [] if request.files: # for a single file upload: #infile = request.files["file"] f_count = -1 for f in request.files.getlist("file"): f_count += 1 infile = f lastModified = -1 # the html upload form passes in a cookie if request.cookies: if "lastmod" in request.cookies: lastModified = int(request.cookies["lastmod"].split(",")[f_count]) # Loop through cookies with this #for c in request.cookies: # print("Deal with {0} which is {1}".format(c,request.cookies[c])) if headers: if 'lastModified' in headers: try: lastModified = headers["lastModified"] except: pass ip_addr = request.remote_addr if "HTTP_X_FORWARDED_FOR" in request.environ: app.logger.info("Found x-forwarded-for= " + request.environ["HTTP_X_FORWARDED_FOR"]) ip_addr = request.environ["HTTP_X_FORWARDED_FOR"] store_result = store_file(infile, lastmodified=lastModified,addr=ip_addr) message += str(store_result).split("'")[1].replace("\\n","
") _, foo = store_result response_codes.append(foo) elif request.method == "GET": if "DISCOVER_URL" in app.config and app.config["DISCOVER_URL"]: if "X-Forwarded-For" in headers: pl = len(headers["X-Forwarded-Host"].split(", ")) app.wsgi_app = ProxyFix(app.wsgi_app,x_for=pl,x_host=pl,x_port=pl,x_prefix=pl,x_proto=pl) return html_template("upload.html") # Have to figure the best response, when handling multiple file uploads #print("List of result codes:",response_codes) final_response_code = 400 if len(response_codes) == 1 or all_same(response_codes): final_response_code = response_codes[0] elif "415" in response_codes: final_response_code = 415 elif "409" in response_codes: final_response_code = 409 elif "413" in response_codes: final_response_code = 413 elif "301" in response_codes: final_response_code = 301 else: final_response_code = 201 return message, final_response_code @app.route("/template/") def get_template(filename): return html_template(filename) # Download route @app.route("/" + dlp.strip("/") + "/") def get(filename): # safety: remove leading dots filename = filename.lstrip(".") if filename.endswith(".meta"): if app.config["META"] and app.config["META_VISIBLE"]: # the metadata file stored in disk has a filename that starts with a dot filename = "." + filename else: # we are not allowed to dispense metadata return redirect(url_for("get",filename=filename.replace(".meta",""), _external=True)) file = os.path.join(up,filename) result = "" app.logger.info("Dealing with {0}".format(file)) if not os.path.exists(file): abort(404) try: name, size, ts, sha256sum, mimetype = stats(file) except: result += "Unable to get size of file {0}\n".format(file) # this should not be here?! #metadata = meta(file) if result != "": return result # Nginx trick for telling web server to handle the file instead of having this application do it if app.config["USE_X_ACCEL_REDIRECT"]: response = make_response() response.headers["Content-Type"] = mimetype response.headers["Content-Length"] = size response.headers["X-Accel-Redirect"] = "/" + os.path.join(up, name) else: response = send_from_directory(up, name, mimetype = mimetype) response.headers['Digest'] = "sha-256={0}".format(sha256sum) if app.config["META"] and app.config["META_VISIBLE"]and "META_HEADERS" in app.config: metadata = meta(file) fm_trim = trim_dict(metadata) app.logger.info("found metadata " + str(fm_trim)) for item in metadata: if item in app.config["META_HEADERS"]: response.headers[str("X-meta-"+item)] = metadata[item] return response # Set logging level here, and other useful values app.logger.setLevel("INFO") app.static_folder=app.config["STATIC_FOLDER"] app.template_folder=app.config["TEMPLATE_FOLDER"] if __name__ == "__main__": manager.add_command('runserver', Server(host=app.config["APP_HOST"], port=app.config["APP_PORT"])) app.run() # vim: set sw=3 ts=3 sts=3 et: