Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implemented multi-archive, downloading of docs, videos and improved naming scheme #3

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 78 additions & 38 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
from multiprocessing import Pool
import shutil

from datetime import datetime
from time import sleep
import yt_dlp
import traceback

vkHandler = None

def init():
Expand Down Expand Up @@ -53,69 +58,104 @@ def handle_command(obj, command):
pic.save(obj)

def handler(dir):
imgs = [file for file in os.listdir(dir) if os.path.isfile(dir+"/"+file)]
arch_path = dir+"/arch.zip"
print("handling photo req | dir: ", dir)
with zipfile.ZipFile(arch_path, "w") as archive:
for img in imgs:
handle_command(dir+"/"+img, command)
archive.write(dir+"/"+img, "imgs/"+img)
return arch_path
imgs = sum(list(map(lambda t: ['{}/{}'.format(t,file) for file in os.listdir(dir+"/"+t) if os.path.isfile('{}/{}/{}'.format(dir,t,file))], [directory for directory in os.listdir(dir) if os.path.isdir(dir+"/"+directory)])), [])
print("handling img request | dir: ", dir)

archives = []
max_size = 200*1024*1024
cur_size = 0
cur_arch = 0
archive = zipfile.ZipFile(f'{dir}/arch{cur_arch}.zip', 'w')
archives.append(f'{dir}/arch{cur_arch}.zip')
for img in imgs:
img_size = os.path.getsize(f'{dir}/{img}')
if (cur_size + img_size) >= max_size:
cur_size = 0
cur_arch += 1
archive.close()
archive = zipfile.ZipFile(f'{dir}/arch{cur_arch}.zip', 'w')
archives.append(f'{dir}/arch{cur_arch}.zip')
#handle_command(f'{dir}/{img}', command)
archive.write(f'{dir}/{img}', img)
cur_size += img_size
archive.close()

return archives

return handler

def img_names_generator():
for name in sequence_generator():
yield str(name)+".jpg"

def handle_request(event, links, request_handler, *args):
request_id = str(generate_id())
request_dir = "userData/" + request_id
os.mkdir(request_dir)

vkHandler.send_message('downloading attachments...')
download_files(links, request_dir, *args)
vkHandler.send_message('processing files...')
arch_path = request_handler(request_dir)
vkHandler.send_message('uploading archive...')
vkHandler.send_message(attachment = arch_path)
shutil.rmtree(request_dir)

def download_file(kwargs):
resp = requests.get(kwargs['url'])
with open(kwargs['path'] + "/" + str(kwargs['name']), "wb") as writer:
writer.write(resp.content)


def download_files(links, path = ".", name_generator = sequence_generator()):
pool = Pool(max(min(len(links), os.cpu_count()), 1))
links = [{'url': link, 'path': path, 'name': next(name_generator)} for link in links]
print(links)
pool.map(download_file, links)
if kwargs['type'] in ['photo', 'doc']:
resp = requests.get(kwargs['url'])
if not os.path.isdir(kwargs['path']):
os.mkdir(kwargs['path'])
if kwargs['name'].endswith('.'):
kwargs['name'] += 'jpg'
with open(kwargs['path'] + "/" + str(kwargs['name']), "wb") as writer:
writer.write(resp.content)

elif kwargs['type'] == 'video' and kwargs['url'] is not None:
if kwargs['name'].endswith('.'):
kwargs['name'] += 'mp4'
ydl_opts = {'outtmpl': kwargs['path'] + "/" + str(kwargs['name']), 'quiet': True}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([kwargs['url']])

def download_files(attachments, path = ".", name_generator = sequence_generator()):
pool = Pool(max(min(len(attachments), os.cpu_count() - 1), 1))
attachments = [{
'type': attachment['type'],
'url': attachment['url'] if 'url' in attachment else ('https://vk.com/video{}_{}'.format(attachment['owner_id'], attachment['id']) if attachment['type'] == 'video' else None),
'path': '{}/{}'.format(path,attachment['owner_id']),
'name': attachment['title'] if "title" in attachment and "ext" in attachment and attachment['title'] is not None and attachment['ext'] is not None and attachment['title'].lower().endswith("."+attachment['ext'].lower()) else ('{}.{}'.format(attachment['title'],attachment['ext']) if "title" in attachment and "ext" in attachment and attachment['title'] is not None and attachment['ext'] is not None else ('{}.'.format(attachment['title']) if 'title' in attachment and attachment['title'] is not None else '{:%Y-%m-%d_%H:%M:%S}_{}.'.format(datetime.fromtimestamp(attachment['date']), attachment['id'])))
} for attachment in attachments]
pool.map(download_file, attachments)
pool.close()
pool.join()


def bot_loop():
print("Bot loop started.")
for event in vkHandler.listen():
if event.type == vkHandler.longpoll.VkBotEventType.MESSAGE_NEW:
photos, command = vkHandler.get_photos_links(), vkHandler.get_command()
attachments, command, message_id = vkHandler.get_attachments(), vkHandler.get_command(), vkHandler.get_message_id()
request_id = str(generate_id())
request_dir = "userData/" + request_id
os.mkdir(request_dir)

try:
handle_request(event, photos, get_img_request_handler(command), img_names_generator())
except BaseException as e:
vkHandler.send_message("An error occured while processing your request.\n"+str(e))
request_handler = get_img_request_handler(command)
vkHandler.send_message('downloading attachments...', reply_to = message_id)
download_files(attachments, request_dir, img_names_generator())
vkHandler.send_message('processing files...', reply_to = message_id)
archives = request_handler(request_dir)
vkHandler.send_message('uploading archives...', reply_to = message_id)
vkHandler.send_message(attachments = archives, reply_to = message_id)

except BaseException as e:
tb_format = traceback.format_exc()
vkHandler.send_message(
"An error occurred while processing your request.\n{}".format(tb_format.splitlines()[-1]),
reply_to = message_id
)
print(tb_format)
finally:
shutil.rmtree(request_dir)

def main():
init()
print("Bot successfuly inited. Starting bot loop.")
print("Bot successfully initialized. Starting bot loop.")
while True:
try:
sleep(1/10)
bot_loop()
except BaseException as e:
print(e)
print("An error occured while bot worked. Restarting...")
print("An error occurred during events processing. Restarting...")
sleep(1/10)

if __name__ == '__main__':
main()
Expand Down
145 changes: 117 additions & 28 deletions vk.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import vk_api.bot_longpoll as longpoll
from random import randint

from datetime import datetime

class vk_wrapper:

def __init__(self, access_token, group_id, api_version = '5.103'):
Expand Down Expand Up @@ -34,41 +36,103 @@ def get_full_message(event, vk_main):
return vk_main.method('messages.getById', params)['items'][0]

@staticmethod
def get_attachments(message):
def get_message_attachments(message):
result = message['attachments']
for fwd_message in message.get('fwd_messages', []):
result.extend(vk_wrapper.get_attachments(fwd_message))
result.extend(vk_wrapper.get_message_attachments(fwd_message))

return result

@staticmethod
def filter_attachments(attachments, at_type):
result = []
for attachment in attachments:
if (attachment['type'] == at_type):
result.append(attachment[at_type])
if (attachment['type'] in at_type):
attachment[attachment['type']]['type'] = attachment['type']
result.append(attachment[attachment['type']])

return result

@staticmethod
def get_photo_link(photo):
sizes = ['s','m','x','o','p','q','r','y','z','w']
best_size = max(photo['sizes'], key = lambda size: sizes.index(size['type']))
return best_size['url']

def get_photos_links(self):
def get_attachment(attachment):
if attachment['type'] == 'photo':
if attachment['orig_photo'] is not None and attachment['orig_photo']['url'] is not None:
return {
'id': attachment['id'],
'owner_id': attachment['owner_id'],
'date': attachment['date'],
'url': attachment['orig_photo']['url'],
'type': attachment['type']
}

sizes = ['s','m','x','o','p','q','r','y','z','w']
best_size = max(attachment['sizes'], key = lambda size: sizes.index(size['type']))
return {
'id': attachment['id'],
'owner_id': attachment['owner_id'],
'date': attachment['date'],
'url': best_size['url'],
'type': attachment['type']
}

elif attachment['type'] == 'doc':
if attachment['url'] is not None:
return {
'id': attachment['id'],
'owner_id': attachment['owner_id'],
'date': attachment['date'],
'url': attachment['url'],
'title': attachment['title'],
'ext': attachment['ext'],
'type': attachment['type']
}

sizes = ['s','m','x','y','z','o']
best_size = max(attachment['preview']['photo']['sizes'], key = lambda size: sizes.index(size['type']))
return {
'id': attachment['id'],
'owner_id': attachment['owner_id'],
'date': attachment['date'],
'url': best_size['src'],
'title': attachment['title'],
'ext': attachment['ext'],
'type': attachment['type']
}

elif attachment['type'] == 'video':
return {
'id': attachment['id'],
'owner_id': attachment['owner_id'],
'date': attachment['date'],
'title': attachment['title'] if 'title' in attachment else None,
'type': attachment['type']
}

def get_attachments(self):
message = self.get_full_message(self.__event['message'], self.__vk_main)
attachments = self.get_attachments(message)
return [self.get_photo_link(photo) for photo in self.filter_attachments(attachments, 'photo')]
attachments = self.get_message_attachments(message)
return [self.get_attachment(attachment) for attachment in self.filter_attachments(attachments, ['photo', 'doc', 'video'])]

def get_command(self):
return self.__event['message']['text']

def get_dialog_id(self):
def get_message_id(self):
return self.__event['message']['id']

def get_peer_id(self):
return self.__event['message']['peer_id']

def send_message(self, message = '', attachment = None, ):
peer_id = self.get_dialog_id()
def get_from_id(self):
return self.__event['message']['from_id']

def get_id(self):
return self.__event['message']['id']

def get_date(self):
return self.__event['message']['date']

def send_message(self, message = '', attachments = None, reply_to = None):
peer_id = self.get_peer_id()

# try:
# if not attachment is None:
Expand All @@ -77,18 +141,43 @@ def send_message(self, message = '', attachment = None, ):
# message += "\nError: can't upload attachment."
# attachment = None

if not attachment is None:
attachment = self.__uploader.document_message(attachment, peer_id = peer_id)


print('attachment', attachment)
params = {
'user_id': peer_id,
'attachment': None if attachment is None else 'doc{owner_id}_{id}'.format(**attachment['doc']),
'message': message,
'random_id': randint(0, 99999999999)
}
self.__vk_main.method('messages.send', params)
i = 0
if not attachments is None and len(attachments) > 0:
for attachment in attachments:
i += 1
attachment_title = '{}_{}_{:%Y-%m-%d_%H:%M:%S}_{}.zip'.format(
self.get_from_id(),
self.get_id(),
datetime.fromtimestamp(self.get_date()),
i
) if len(attachments) > 1 else '{}_{}_{:%Y-%m-%d_%H:%M:%S}.zip'.format(
self.get_from_id(),
self.get_id(),
datetime.fromtimestamp(self.get_date()),
)
attachment = self.__uploader.document_message(
attachment,
peer_id = peer_id,
title = attachment_title
)
print('attachment', attachment)

params = {
'user_id': peer_id,
'attachment': 'doc{owner_id}_{id}'.format(**attachment['doc']),
'message': message,
'random_id': randint(0, 99999999999),
'reply_to': reply_to
}
self.__vk_main.method('messages.send', params)

else:
params = {
'user_id': peer_id,
'message': message,
'random_id': randint(0, 99999999999),
'reply_to': reply_to
}
self.__vk_main.method('messages.send', params)