python s3fs

Python

python s3fs

Kyle79 2020. 7. 22. 15:11

https://pypi.org/project/s3fs/#description

s3fs

Convenient Filesystem interface over S3

pypi.org

import s3fs

bytes_to_write = df.to_csv(None).encode()
fs = s3fs.S3FileSystem(key=key, secret=secret)
with fs.open('s3://bucket/path/to/file.csv', 'wb') as f:
    f.write(bytes_to_write)

s3fs는 파일을 여는 rb 및 wb 모드 만 지원하므로이 bytes_to_write 작업을 수행 한 것입니다.

import s3fs

s3 = s3fs.S3FileSystem(anon=False)

# Use 'w' for py3, 'wb' for py2
with s3.open('<bucket-name>/<filename>.csv','w') as f:
    df.to_csv(f)

StringIO의 문제점은 메모리에서 음식을 먹지 못한다는 것입니다. 이 방법을 사용하면 파일을 문자열로 변환하지 않고 s3으로 스트리밍 한 다음 s3에 씁니다. pandas 데이터 프레임과 문자열 복사본을 메모리에 보관하는 것은 매우 비효율적 인 것 같습니다.

python 3.x 는 binary 를 지정하지 않아도 된다.

import s3fs
import pandas as pd
fs = s3fs.S3FileSystem(anon=False)

# CSV
with fs.open('mybucket/path/to/object/foo.pkl') as f:
    df = pd.read_csv(f)

# Pickle
with fs.open('mybucket/path/to/object/foo.pkl') as f:
    df = pd.read_pickle(f)

s3 = s3fs.S3FileSystem(anon=False)  # uses default credentials
with s3.open('mybucket/new-file', 'w') as f:
    f.write(2*2**20 * 'a')
    f.write(2*2**20 * 'a') # data is flushed and file closed

s3.du('mybucket/new-file')

fieldnames = ['This','aNew']
fs = s3fs.S3FileSystem(key=ACCESS_KEY, secret=SECRET_KEY)
with fs.open('s3://dhk-dp/file.csv', encoding='euc-kr', mode='w') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writerow({'This':'제품', 'aNew':'Row'})