diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fc3d7ac --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +*.pyc + +# Configuration +conf*.yml +conf*.yaml + +# Logging +*.log +log.log.* + +#virtualenv +/bin/ +/include/ +/lib/ +/pip-selfcheck.json +/share/ +instabotenv/ +db_data/ + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0c4c0ce --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.6 +MAINTAINER Pyotr Ermishkin + +COPY instabot /instabot/ +COPY docker-entrypoint.sh / +COPY instabot_runner.py / +COPY requirements.txt / + +VOLUME /configuration + +RUN pip install -r requirements.txt + +CMD ["/docker-entrypoint.sh"] diff --git a/InstaBot.py b/InstaBot.py deleted file mode 100644 index 25941c1..0000000 --- a/InstaBot.py +++ /dev/null @@ -1,109 +0,0 @@ -import mechanize, yaml, re, time, sys, pycurl, hmac, urllib -from hashlib import sha256 - -WEBSTA_URL = "http://websta.me/" -WEBSTA_HASHTAG = WEBSTA_URL + "hot" - -INSTAGRAM_API = "https://api.instagram.com/v1/media/" -USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11' - -# Function to encode the string with the IP and ID of the picture then like it -def encodeAndRequest(id): - c = pycurl.Curl() - signature = hmac.new(str(profile['CREDENTIALS']['CLIENT_SECRET']), profile['IP'], sha256).hexdigest() - header = '|'.join([profile['IP'], signature]) - header = ["X-Insta-Forwarded-For: " + header] - - url = INSTAGRAM_API + id + "/likes" - c.setopt(c.URL, url) - c.setopt(c.POSTFIELDS, "access_token=" + str(profile['CREDENTIALS']['ACCESS_TOKEN'])) - c.setopt(pycurl.HTTPHEADER, header) - c.perform() - - response = str(c.getinfo(c.HTTP_CODE)) - c.close() - - return response - -# Function to parse the Top HashTag page and get the current top hashtags -def getTopHashTags(br): - br.open(WEBSTA_HASHTAG) - topHashtags = re.findall('\"\>#(.*)\<\/a\>\<\/strong\>', br.response().read()) - return topHashtags - -# Function to read the hashtags from a users file if not wanting to parse the top 100 -def getHashtagsFromFile(): - #your list of hashtags - hashtags = [] - filename = 'hashtags.txt' - #Hashtag file input - f = open(filename) - #strips newline character - hashtags = [unicode(line.strip(), 'utf-8') for line in open(filename)] - f.close() - return hashtags - -# Function to like hashtages -def like(br, hashtags): - likes = 0 - - for hashtag in hashtags: - hashtaglikes = 0 - media_id = [] - response = br.open(WEBSTA_URL +"tag/" + urllib.quote(hashtag.encode('utf-8'))) - print u"Liking #%s" % hashtag - media_id = re.findall("span class=\"like_count_(.*)\"", response.read()) - - for id in media_id: - - if profile['MAXLIKES'] == "NO_MAX": - pass - elif likes >= int(profile['MAXLIKES']): - print "You have reached MAX_LIKES(" + str(profile['MAXLIKES']) + ")" - print u"This # is currently %s" % hashtag - sys.exit() - break - - if profile['PERHASHTAG'] == "NO_MAX": - pass - elif hashtaglikes >= int(profile['PERHASHTAG']): - print "REACHED MAX_LIKES PER HASHTAG" - print "MOVING ONTO NEXT HASHTAG" - hashtaglikes = 0 - break - - response = encodeAndRequest(id) - - if bool(re.search("200", response)): - print " YOU LIKED " + str(id) - likes += 1 - hashtaglikes += 1 - time.sleep(profile['SLEEPTIME']) - else: - print "SOMETHING WENT WRONG" - print response - print "SLEEPING FOR 60 seconds" - print "CURRENTLY LIKED " + str(likes) + " photos" - time.sleep(60) - - print "YOU LIKED " + str(likes) + " photos" - -if __name__ == "__main__": - - print "=================================" - print " InstaBot " - print " Developed by Marc Laventure " - print "=================================" - print "" - - profile = yaml.safe_load(open("profile.yml", "r")) - br = mechanize.Browser() - br.set_handle_robots(False) - br.set_handle_equiv(False) - br.addheaders = [('User-Agent', USER_AGENT), ('Accept', '*/*')] - - if profile['TOP'] == 1: - hashtags = getTopHashTags(br) - else: - hashtags = getHashtagsFromFile() - like(br, hashtags) diff --git a/PycURL Download.md b/PycURL Download.md deleted file mode 100644 index e210e72..0000000 --- a/PycURL Download.md +++ /dev/null @@ -1,9 +0,0 @@ -[Download PycURL](http://pycurl.sourceforge.net/) - -Run - -``` -tar -xvf pycurl-7.19.5.tar.gz -cd pycurl-7.19.5/ -sudo python setup.py install -``` diff --git a/README.md b/README.md index b1c0831..41fdc6d 100644 --- a/README.md +++ b/README.md @@ -1,61 +1,97 @@ -InstaBot -======== +# InstaBot -NOTE BIG UPDATE ON FUCTIONALITY; PLEASE UPDATE YOUR WORKING COPY AND FOLLOW NEW INSTRUCTIONS! +Instagram bot written in Python 3 that cycles through specified hashtags and automatically likes pictures with those hashtags to get more followers. The bot also follows people and unfollows them after specified period of time. Unfollowed people are saved in DB to prevent following them again. To find new people to follow it uses list of followers of people you have followed. -A simple Instagram bot that pulls trending top 100 hashtags and auto likes pictures with those hashtags to get more followers. +During installation process it saves people followed by you as "followed long time ago" and unfollows them at the first start. -Developed in Python and built with the mechanize library +The bot doesn't use Instagram API so all credentials you need are your login and password. -STILL IN DEVELOPMENT, CONTRIBUTIONS ARE WELCOME +## Deployment -##Requirements - -1. Python is installed (Tested with version 2.6.8) -2. mechanize library is installed [Mechanize download!](http://wwwsearch.sourceforge.net/mechanize/download.html) V0.2.5 -3. PyYAML libray is installed [PyYAML download!](pyyaml.org/wiki/PyYAML) V3.11 -4. Authenticated your instagram account on [websta.me](http://websta.me/) -5. PycURL library installed [PycURL download!](http://pycurl.sourceforge.net/) V7.19.5 -6. Registered a client for your account on [instagram](http://instagram.com/developer/clients/manage/) - -##Setup -Clone this repository: -``` -git clone https://github.com/marclave/InstaBot.git +```sh +docker network create \ + --subnet=172.21.0.0/24 \ + instabot +docker run \ + --name=instabot-mysql \ + --net=instabot \ + --ip=172.21.0.2 \ + --env="MYSQL_ROOT_PASSWORD=ZEbMKcFQppk8m8PR3b" \ + --env="MYSQL_DATABASE=instabot" \ + --env="MYSQL_USER=instabot" \ + --env="MYSQL_PASSWORD=KbWj0Eua78YGLNLf3K" \ + --volume=`pwd`/lib:/var/lib/mysql \ + --detach \ + mysql:5.7 +docker build --tag=instabot . ``` -Follow install instructions for PycURL: [instructions](PycURL Download.md) -Go to [instagram clients](http://instagram.com/developer/clients/manage/) -Register your account for a developers client -Retrieve your CLIENT SECRET and USER ID token under "Manage Clients" -To retrieve your access token, go to [instagram api console](http://instagram.com/developer/api-console/) -Run a query involving your USER ID and grab your access token from the request +Create MySQL DB: -Note: Ensure likes are part of the access scope [enable likes scope](https://instagram.com/oauth/authorize/?client_id=INSERT_CLIENTID&redirect_uri=INSERT_REDIRECTURI&response_type=code&scope=likes+basic) +```sql +CREATE DATABASE IF NOT EXISTS instagram CHARACTER SET utf8 COLLATE utf8_general_ci; +CREATE USER instabot@localhost IDENTIFIED BY 'GT8H!b]5,9}A7'; +GRANT ALL ON instagram.* TO instabot@localhost; +``` +Create `configuration.yml` file containing your credentials, e.g.: -Modify the profile to include your information, example: -``` -CREDENTIALS: - ACCESS_TOKEN: "USER_ACCESS_TOKEN" - CLIENT_SECRET: "USER_CLIENT_SECRET" -MAXLIKES: 1000 <- If you dont want a max, input NO_MAX -PERHASHTAG: 10 <- If you dont want a max, input NO_MAX -TOP: 1 <- To use the top hashtags on Websta.me use a 1 -IP: "USER_IP_ADDRESS" <- run ipconfig or ifconfig to grab your ip address +```yaml +credentials: + username: "your_username" + password: "eKeFB2;AW6fS}z" +db: + host: "172.21.0.2" + name: "instabot" + user: "instabot" + password: "KbWj0Eua78YGLNLf3K" +following_hours: 120 +hashtags: + - I + - люблю + - Python +instagram: + limit_sleep_time_coefficient: 1.3 + limit_sleep_time_min: 30 + success_sleep_time_coefficient: 0.5 + success_sleep_time_max: 6 + success_sleep_time_min: 4 +logging: + version: 1 + formatters: + simple: + class: logging.Formatter + format: "%(asctime)s - %(levelname)s - %(name)s - %(message)s" + handlers: + console: + class: logging.StreamHandler + level: DEBUG + formatter: simple + loggers: + instabot: + level: DEBUG + root: + level: DEBUG + handlers: + - console +users_to_follow_cache_size: 300 ``` -Note: If you do not put a 1 in the value of TOP then the program will look for a text file -called hashtags.txt. -The format for this file is to have each hashtag seperated by line, example: +Where: -``` -I -Love -Python -``` +* `following_hours` — how long users will stay followed. +* `hashtags` — list of hashtags to get photos to like. Optional. By default bot won't like anything. +* `logging` — logging setup as described in [this howto](https://docs.python.org/3/howto/logging.html). +* `users_to_follow_cache_size` — how much users should be fetched for following. The cache is being filled in once a minute. Optional. By default bot won't follow anybody. -Then run: -``` -python InstaBot.py +Now you may run the bot: + +```sh +docker run \ + --name=instabot \ + --net=instabot \ + --ip=172.21.0.10 \ + --volume=`pwd`/configuration:/configuration \ + --detach \ + instabot ``` diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..de1ae54 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,13 @@ +mysql: + image: mysql + ports: + - "3306:3306" + expose: + - "3306" + environment: + - MYSQL_ROOT_PASSWORD=instabotisgrate + - MYSQL_DATABASE=instagram + - MYSQL_USER=instabot + - MYSQL_PASSWORD=GT8H!b]5,9}A7 + volumes: + - ./db_data:/var/lib/mysql diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100755 index 0000000..ce93d04 --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +/instabot_runner.py install /configuration/configuration.yml +/instabot_runner.py /configuration/configuration.yml diff --git a/hashtags.txt b/hashtags.txt deleted file mode 100644 index ec1bf27..0000000 --- a/hashtags.txt +++ /dev/null @@ -1,3 +0,0 @@ -I -Love -Python \ No newline at end of file diff --git a/instabot/__main__.py b/instabot/__main__.py new file mode 100644 index 0000000..ac05b39 --- /dev/null +++ b/instabot/__main__.py @@ -0,0 +1,5 @@ +'''instabot.__main__: executed when instabot directory is called as script.''' + +from .instabot import main + +main() diff --git a/instabot/configuration.py b/instabot/configuration.py new file mode 100644 index 0000000..7737dba --- /dev/null +++ b/instabot/configuration.py @@ -0,0 +1,49 @@ +import logging +import sys +import yaml + +LOGGER = logging.getLogger('instabot.configuration') + + +class Configuration: + def __init__(self, filename): + try: + with open(filename, 'r') as f: + configuration = yaml.safe_load(f) + except (IOError, OSError, ValueError) as e: + sys.exit('Can\'t obtain configuration: {}'.format(e)) + try: + self.db_host = configuration['db']['host'] + self.db_name = configuration['db']['name'] + self.db_user = configuration['db']['user'] + self.db_password = configuration['db']['password'] + self.following_hours = configuration['following_hours'] + self.instagram_limit_sleep_time_coefficient = \ + configuration['instagram']['limit_sleep_time_coefficient'] + self.instagram_limit_sleep_time_min = \ + configuration['instagram']['limit_sleep_time_min'] + self.instagram_success_sleep_time_coefficient = \ + configuration['instagram']['success_sleep_time_coefficient'] + self.instagram_success_sleep_time_max = \ + configuration['instagram']['success_sleep_time_max'] + self.instagram_success_sleep_time_min = \ + configuration['instagram']['success_sleep_time_min'] + self.instagram_username = configuration['credentials']['username'] + self.instagram_password = configuration['credentials']['password'] + self.logging = configuration['logging'] + except (KeyError, TypeError) as e: + sys.exit( + 'Configuration is not fully specified. {} is missed.' + .format(e), + ) + self.hashtags = configuration.get('hashtags', []) + self.users_to_follow_cache_size = configuration.get( + 'users_to_follow_cache_size', + 0, + ) + try: + self.following_hours = int(self.following_hours) + self.users_to_follow_cache_size = \ + int(self.users_to_follow_cache_size) + except ValueError as e: + sys.exit('Some integer value is specified wrong: {}'.format(e)) diff --git a/instabot/db.py b/instabot/db.py new file mode 100644 index 0000000..5293b33 --- /dev/null +++ b/instabot/db.py @@ -0,0 +1,39 @@ +import logging +import sys +from instabot import user +from peewee import * +from playhouse.shortcuts import RetryOperationalError + +LOGGER = logging.getLogger('instabot') + + +class RetryingMySQLDatabase(RetryOperationalError, MySQLDatabase): + """ + Automatically reconnecting database class. + @see {@link + http://docs.peewee-orm.com/en/latest/peewee/database.html#automatic + -reconnect} + """ + + def sequence_exists(self, seq): + pass + + +def get_db(configuration): + """ + @raise DBError + """ + db = RetryingMySQLDatabase( + configuration.db_name, + host=configuration.db_host, + user=configuration.db_user, + password=configuration.db_password, + ) + # Connect to database just to check if configuration has errors. + try: + db.connect() + except DatabaseError as e: + sys.exit('DatabaseError during connecting to database: {0}'.format(e)) + db.close() + user.database_proxy.initialize(db) + return db diff --git a/instabot/errors.py b/instabot/errors.py new file mode 100644 index 0000000..f788d28 --- /dev/null +++ b/instabot/errors.py @@ -0,0 +1,31 @@ +class APIError(Exception): + def __init__(self, code): + super(APIError, self).__init__(str(code)) + + +class APIFailError(APIError): + pass + + +class APIJSONError(Exception): + pass + + +class APILimitError(Exception): + pass + + +class APINotAllowedError(Exception): + pass + + +class APINotFoundError(Exception): + pass + + +class ConfigurationError(Exception): + pass + + +class DBError(Exception): + pass diff --git a/instabot/following_service.py b/instabot/following_service.py new file mode 100644 index 0000000..254099e --- /dev/null +++ b/instabot/following_service.py @@ -0,0 +1,90 @@ +import asyncio +import datetime +import logging +from .errors import APIError, APIJSONError, APILimitError, \ + APINotAllowedError, APINotFoundError, APIFailError +from .stats_service import StatsService +from .user import User +from aiohttp.errors import ClientResponseError + +LOGGER = logging.getLogger('instabot.following_service') + + +class FollowingService: + def __init__(self, client, configuration): + self._client = client + self._following_timedelta = \ + datetime.timedelta(hours=configuration.following_hours) + self._stats_service = StatsService.get_instance() + + async def run(self): + while True: + try: + await self._unfollow() + await self._follow() + except APILimitError as e: + LOGGER.debug(e) + except (APIError, APIJSONError) as e: + LOGGER.debug(e) + await asyncio.sleep(5) + except (IOError, OSError, ClientResponseError) as e: + LOGGER.warning(e) + await asyncio.sleep(5) + else: + await asyncio.sleep(10) + + async def _follow(self): + """ + @raise APIError + @raise APIJSONError + @raise APILimitError + """ + unfollowing_threshold = datetime.datetime.utcnow() - \ + self._following_timedelta + for user in User.select().where( + User.was_followed_at == None, + ).order_by(User.following_depth, User.created): + try: + await self._client.follow(user) + except (APINotAllowedError, APINotFoundError) as e: + LOGGER.debug('Can\'t follow {}. {}'.format(user.username, e)) + # Make user look like he was followed and was unfollowed + # already. + user.is_followed = False + user.was_followed_at = unfollowing_threshold + else: + user.is_followed = True + user.was_followed_at = datetime.datetime.utcnow() + self._stats_service.increment('followed') + user.save() + + async def _unfollow(self): + """Tries to unfollow all of the users that should be unfollowed. + + Raises: + APIError + APIJSONError + APILimitError + + """ + unfollowing_threshold = datetime.datetime.utcnow() - \ + self._following_timedelta + for user in User.select().where( + (User.is_followed == True) & + (User.was_followed_at <= unfollowing_threshold), + ): + try: + await self._client.unfollow(user) + except APIFailError as e: + LOGGER.info( + 'It seems like {} can\'t be unfollowed properly. ' + 'Skipping her. {}' + .format(user.username, e) + ) + self._stats_service.increment('unfollowed') + except (APINotAllowedError, APINotFoundError) as e: + LOGGER.debug('Can\'t unfollow {}. {}'.format(user.username, e)) + else: + self._stats_service.increment('unfollowed') + user.is_followed = False + user.save() diff --git a/instabot/instabot.py b/instabot/instabot.py new file mode 100644 index 0000000..a9c1adf --- /dev/null +++ b/instabot/instabot.py @@ -0,0 +1,125 @@ +import asyncio +import datetime +import logging +import logging.config +from .configuration import Configuration +from .db import get_db +from .errors import ConfigurationError +from .following_service import FollowingService +from .like_service import LikeService +from .media_service import MediaService +from .stats_service import StatsService +from .user import User +from .user_service import UserService +from docopt import docopt +from instabot import instagram +from os import path + +DIR = path.abspath(path.dirname(__file__)) +DOC = '''InstaBot + +Usage: + instabot CONFIGURATION + instabot install CONFIGURATION + instabot unfollow CONFIGURATION + instabot -h | --help | --version + +Arguments: + CONFIGURATION Path to configuration.yml file. +''' +LOGGER = logging.getLogger('instabot') +__version__ = '0.4.1' + + +def install(configuration, db): + LOGGER.info('Installing InstaBot') + db.create_tables([User]) + client = instagram.Client(configuration) + now = datetime.datetime.utcnow() + was_followed_at = now - \ + datetime.timedelta(hours=configuration.following_hours) + user = User.create( + following_depth=0, + instagram_id=client.id, + username=configuration.instagram_username, + # To prevent attempts to follow user by himself. + was_followed_at=was_followed_at, + ) + + unfollow(configuration) + + +def main(): + arguments = docopt(DOC, version=__version__) + logging.basicConfig(level=logging.DEBUG) + + configuration = Configuration(arguments['CONFIGURATION']) + + logging.config.dictConfig(configuration.logging) + + db = get_db(configuration) + + if arguments['install']: + install(configuration, db) + elif arguments['unfollow']: + unfollow(configuration) + else: + run(configuration) + + +def run(configuration): + LOGGER.info('Executing InstaBot') + loop = asyncio.get_event_loop() + + stats_service = StatsService() + loop.create_task(stats_service.run()) + + following_client = instagram.Client(configuration) + + try: + user_service = UserService(following_client, configuration) + except ConfigurationError as e: + LOGGER.info('UserService wasn\'t started. {}'.format(e)) + else: + loop.create_task(user_service.run()) + + following_service = FollowingService(following_client, configuration) + loop.create_task(following_service.run()) + + like_client = instagram.Client(configuration) + try: + media_service = MediaService(like_client, configuration) + except ConfigurationError as e: + LOGGER.info('MediaService wasn\'t started. {}'.format(e)) + else: + loop.create_task(media_service.run()) + like_service = LikeService(like_client, media_service) + loop.create_task(like_service.run()) + + loop.run_forever() + + +def unfollow(configuration): + LOGGER.info('Scheduling unfollowing of everyone') + client = instagram.Client(configuration) + loop = asyncio.get_event_loop() + followed_users_json = loop.run_until_complete(client.get_followed( + User.get(instagram_id=client.id), + )) + now = datetime.datetime.utcnow() + was_followed_at = now - \ + datetime.timedelta(hours=configuration.following_hours) + for followed_json in followed_users_json: + try: + user = User.get(instagram_id=followed_json['id']) + except User.DoesNotExist: + user = User(instagram_id=followed_json['id']) + user.username = followed_json['username'] + user.following_depth = 0 + user.is_followed = True + if not user.was_followed_at or was_followed_at < user.was_followed_at: + user.was_followed_at = was_followed_at + user.save() + LOGGER.info( + '{0} followed users were saved in DB'.format(len(followed_users_json)), + ) diff --git a/instabot/instagram.py b/instabot/instagram.py new file mode 100644 index 0000000..88709d3 --- /dev/null +++ b/instabot/instagram.py @@ -0,0 +1,460 @@ +import asyncio +import logging +import json +import re +import urllib.parse + +from .errors import APIError, APILimitError, \ + APINotAllowedError, APINotFoundError, APIFailError +from aiohttp import ClientSession +from http import HTTPStatus + +BASE_URL = 'https://www.instagram.com/' +LOGGER = logging.getLogger('instabot.instagram') +USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0' + + +class Client: + def __init__(self, configuration): + self._limit_sleep_time_coefficient = configuration \ + .instagram_limit_sleep_time_coefficient + self._limit_sleep_time_min = configuration \ + .instagram_limit_sleep_time_min + self._success_sleep_time_coefficient = configuration \ + .instagram_success_sleep_time_coefficient + self._success_sleep_time_max = configuration \ + .instagram_success_sleep_time_max + self._success_sleep_time_min = configuration \ + .instagram_success_sleep_time_min + self._limit_sleep_time = self._limit_sleep_time_min + self._success_sleep_time = self._success_sleep_time_max + self._username = configuration.instagram_username + self._password = configuration.instagram_password + self._referer = BASE_URL + self._session = ClientSession( + cookies={ + 'ig_pr': '1', + 'ig_vw': '1920', + }, + headers={ + 'User-Agent': USER_AGENT, + 'X-Instagram-AJAX': '1', + 'X-Requested-With': 'XMLHttpRequest', + }, + ) + loop = asyncio.get_event_loop() + loop.run_until_complete(self._do_login()) + + async def _ajax(self, url, data=None, referer=None): + """Simulates AJAX request. + + Args: + url (str): URL path. e.g.: 'query/' + data (dict, optional) + referer (str, optional): Last visited URL. + + Raises: + APIError + APIFailError + APIJSONError + APILimitError + APINotAllowedError + APINotFoundError + + """ + if referer is not None: + self._referer = referer + url = f'{BASE_URL}{url}' + headers = { + 'Referer': self._referer, + 'X-CSRFToken': self._csrf_token, + } + async with self._session.post( + url, + data=data, + headers=headers, + ) as response: + if response.status == HTTPStatus.NOT_FOUND: + response.close() + await self._sleep_success() + raise APINotFoundError(f'AJAX response status code is 404 for {url}') + elif HTTPStatus.INTERNAL_SERVER_ERROR <= response.status: + response.close() + await self._sleep_success() + raise APIError(response.status) + text = await response.text() + try: + response_dict = json.loads(text) + except ValueError as err: + reason = f'AJAX request to {url} is not JSON: {err} Response ({response.status}): \"{text}\"' + if response.status == HTTPStatus.OK: + await self._sleep_success() + raise APIError(reason) + elif response.status == HTTPStatus.BAD_REQUEST: + await self._sleep_success() + raise APINotAllowedError(reason) + else: + await self._sleep_success() + raise APIError(reason) + status = response_dict.get('status') + if status == 'fail': + message = response_dict.get('message') + if isinstance(message, str) and 'temporarily blocked' in message: + await self._sleep_limit() + raise APILimitError(f'AJAX request to {url} was blocked: {response_dict}') + raise APIFailError(f'AJAX request to {url} was failed: {response_dict}') + elif status != 'ok': + raise APIError(f'AJAX request to {url} is not OK: {response_dict}') + LOGGER.debug(f'Request: {url} Response: {response_dict}') + await self._sleep_success() + return response_dict + + async def _do_login(self): + """Logins client session. + + Raises: + APIJSONError + APILimitError + APINotAllowedError + APIError + + """ + await self._open(BASE_URL) + self._update_csrf_token() + await self._ajax( + 'accounts/login/ajax/', + data={ + 'username': self._username, + 'password': self._password, + }, + ) + self._update_csrf_token() + try: + self.id = self._session.cookies['ds_user_id'].value + except KeyError as err: + reason = 'Can\'t obtain user ID from cookies.' + LOGGER.exception(reason) + raise APIError(reason) from err + + async def follow(self, user): + """ + @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError + @raise APINotFoundError + @raise APIError + """ + try: + await self._ajax( + 'web/friendships/{}/follow/'.format(user.instagram_id), + referer=user.get_url(), + ) + except APILimitError as e: + raise APILimitError( + 'API limit was reached during following {}. {}' + .format(user.username, e), + ) + except APIError as e: + raise APIError( + 'API troubles during following {}. {}' + .format(user.username, e), + ) + else: + LOGGER.debug('{} was followed'.format(user.username)) + + async def get_followed(self, user): + """Fetches information about people followed by given user. + + Args: + user (User): Whose subscriptions should be fetched. + + Returns: + List of dicts containing following fields: + { + 'id': '123', + 'username': 'foobar', + } + + Raises: + APIJSONError + APILimitError + APINotAllowedError + APIError + + """ + single_response_size = 50 + + response = await self._ajax( + 'query/', + { + 'q': 'ig_user({id}) {{ follows.first({count}) {{ count,' + ' page_info {{ end_cursor, has_next_page }},' + ' nodes {{ id, is_verified,' + ' followed_by_viewer, requested_by_viewer,' + ' full_name, profile_pic_url,' + ' username }} }}}}' + .format( + id=user.instagram_id, + count=single_response_size, + ), + 'ref': 'relationships::follow_list', + }, + referer=user.get_url(), + ) + followed = response['follows']['nodes'] + while response['follows']['page_info']['has_next_page']: + end_cursor = response['follows']['page_info']['end_cursor'] + response = await self._ajax( + 'query/', + { + 'q': 'ig_user({id}) {{ follows.after({end_cursor},' + ' {count}) {{ count, page_info {{ end_cursor,' + ' has_next_page }}, nodes {{ id,' + ' is_verified, followed_by_viewer,' + ' requested_by_viewer, full_name,' + ' profile_pic_url, username }} }}}}' + .format( + id=user.instagram_id, + end_cursor=end_cursor, + count=single_response_size, + ), + 'ref': 'relationships::follow_list', + }, + referer=user.get_url(), + ) + followed.extend(response['follows']['nodes']) + LOGGER.debug('{} followed users were fetched'.format(len(followed))) + return followed + + async def _get_followers_page(self, user, cursor=None): + """ + Args: + user (User): User whose followers should be fetched + cursor: The next page to retrieve, if possible. + :param user: + :param cursor: + :return: + """ + cursor = 'first(20)' if cursor is None else \ + 'after({}, 20)'.format(cursor) + query = '''ig_user({user_instagram_id}) {{ + followed_by.{cursor} {{ + count, + page_info {{ + end_cursor, + has_next_page + }}, + nodes {{ + id, + is_verified, + followed_by {{count}}, + follows {{count}}, + followed_by_viewer, + follows_viewer, + requested_by_viewer, + full_name, + profile_pic_url, + username + }} + }} + }}''' \ + .format(user_instagram_id=user.instagram_id, cursor=cursor) + data = {'q': query, 'ref': 'relationships::follow_list'} + response = await self._ajax('query/', data, referer=user.get_url()) + try: + followers = response['followed_by']['nodes'] + page_info = response['followed_by']['page_info'] + end_cursor = page_info['end_cursor'] + has_next_page = page_info['has_next_page'] + except (KeyError, TypeError) as e: + raise APINotAllowedError( + 'Instagram have given unexpected data in ' + '`_get_followers_page`. Response JSON: {response} ' + 'Error: {error}'.format( + response=response, + error=e, + ) + ) + return followers, end_cursor, has_next_page + + async def get_media_by_hashtag(self, hashtag): + """Fetches some media about specified hashtag. + + Returns: + List of media IDs (strings) + + Args: + hashtag (str): Hashtag to fetch + + Raises: + APIError + IOError + OSError + ClientResponseError + + """ + url = '{}explore/tags/{}/'.format( + BASE_URL, + urllib.parse.quote(hashtag.encode('utf-8')), + ) + response = await self._session.get(url) + response = await response.read() + response = response.decode('utf-8', errors='ignore') + match = re.search( + r'', + response, + ) + if match is None: + raise APIError('Can\'t find JSON in the response: {}', response) + try: + response = json.loads(match.group(1)) + except ValueError as e: + raise APIError('Can\'t parse response JSON: {}'.format(e)) + try: + tag = response['entry_data']['TagPage'][0]['graphql']['hashtag'] + edges = tag['edge_hashtag_to_media']['edges'] + media = [edge['node']['id'] for edge in edges] + except (KeyError, TypeError) as e: + raise APIError( + 'Can\'t obtain media from response JSON: {}'.format(e), + ) + LOGGER.debug( + '{} media about \"{}\" were fetched'.format(len(media), hashtag), + ) + return media + + async def get_some_followers(self, user): + """Fetches some amount of followers of given user. + + Args: + user (User): Whose followers should be fetched. + + Returns: + List of dicts containing following fields: + { + 'id': '123', + 'username': 'foobar', + } + + Raises: + APIJSONError + APILimitError + APINotAllowedError + APIError + + """ + pages_to_fetch = 3 + followers = [] + get_next = True + cursor = None # Eventually we will check if we have a + # cached page and use that. + LOGGER.debug('Fetching followers of {}'.format(user.username)) + while get_next and pages_to_fetch > 0: + next_followers, cursor, get_next = await self._get_followers_page( + user=user, + cursor=cursor, + ) + followers.extend(next_followers) + pages_to_fetch -= 1 + await asyncio.sleep(5) + # TODO: Cache cursor for continuation of this, if needed. + LOGGER.debug('Fetched {} followers of {}' + .format(len(followers), user.username)) + return followers + + async def like(self, media): + """ + @raise APIError + @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError + @raise APINotFoundError + """ + try: + await self._ajax('web/likes/{}/like/'.format(media)) + except APILimitError as e: + raise APILimitError( + 'API limit was reached during liking {}. {}'.format(media, e), + ) + else: + LOGGER.debug('Liked {}'.format(media)) + + async def _open(self, url): + """Opens given URL (HTTP GET). + + Args: + url (str) + + Returns: + str: Response. + + """ + headers = { + 'Referer': self._referer, + } + response = await self._session.get(url, headers=headers) + self._referer = url + response = await response.text() + return response + + async def relogin(self): + await self._session.close() + self._session.cookies.clear() + await self._do_login() + + async def _sleep_limit(self): + LOGGER.debug( + 'Sleeping for {:.0f} sec because of API limits' + .format(self._limit_sleep_time), + ) + await asyncio.sleep(self._limit_sleep_time) + self._limit_sleep_time *= self._limit_sleep_time_coefficient + + async def _sleep_success(self): + if self._limit_sleep_time != self._limit_sleep_time_min: + self._limit_sleep_time = self._limit_sleep_time_min + self._success_sleep_time = self._success_sleep_time_max + await asyncio.sleep(self._success_sleep_time) + self._success_sleep_time = self._success_sleep_time_min + \ + (self._success_sleep_time - self._success_sleep_time_min) * \ + self._success_sleep_time_coefficient + + async def unfollow(self, user): + """Unfollows certain user. + + Raises: + APIError + APIFailError + APIJSONError + APILimitError + APINotAllowedError + APINotFoundError + + """ + try: + await self._ajax( + 'web/friendships/{}/unfollow/'.format(user.instagram_id), + referer=user.get_url(), + ) + except APILimitError as e: + raise APILimitError( + 'API limit was reached during unfollowing {}. {}' + .format(user.username, e), + ) + except APIFailError as e: + raise APIFailError( + 'API troubles during unfollowing {}. {}' + .format(user.username, e), + ) + except APIError as e: + raise APIError( + 'API troubles during unfollowing {}. {}' + .format(user.username, e), + ) + else: + LOGGER.debug('{} was unfollowed'.format(user.username)) + + def _update_csrf_token(self): + self._csrf_token = self._session.cookies['csrftoken'].value + LOGGER.debug('CSRF token is %s', self._csrf_token) diff --git a/instabot/like_service.py b/instabot/like_service.py new file mode 100644 index 0000000..814588a --- /dev/null +++ b/instabot/like_service.py @@ -0,0 +1,35 @@ +import asyncio +import logging +from .errors import APIError, APIJSONError, APILimitError, \ + APINotAllowedError, APINotFoundError +from .stats_service import StatsService +from aiohttp.errors import ClientResponseError + +LOGGER = logging.getLogger('instabot.like_service') + + +class LikeService: + def __init__(self, client, media_service): + self._client = client + self._media_service = media_service + self._stats_service = StatsService.get_instance() + + async def run(self): + media = await self._media_service.pop() + while True: + try: + await self._client.like(media) + except APILimitError as e: + LOGGER.debug(e) + except (APIError, APIJSONError) as e: + LOGGER.debug(e) + await asyncio.sleep(5) + except (APINotAllowedError, APINotFoundError) as e: + LOGGER.debug('Can\'t like {}. {}'.format(media, str(e))) + media = await self._media_service.pop() + except (IOError, OSError, ClientResponseError) as e: + LOGGER.warning(e) + await asyncio.sleep(5) + else: + media = await self._media_service.pop() + self._stats_service.increment('liked') diff --git a/instabot/media_service.py b/instabot/media_service.py new file mode 100644 index 0000000..fe92b07 --- /dev/null +++ b/instabot/media_service.py @@ -0,0 +1,38 @@ +import asyncio +import itertools +import logging +from .errors import APIError, ConfigurationError +from aiohttp.errors import ClientResponseError + +LOGGER = logging.getLogger('instabot.media_service') +MEDIA_COUNT_MIN = 100 + + +class MediaService: + def __init__(self, client, configuration): + self._hashtags = configuration.hashtags + if len(self._hashtags) == 0: + raise ConfigurationError('No hashtags were specified') + self._media = [] + self._client = client + + async def run(self): + for hashtag in itertools.cycle(self._hashtags): + try: + media = await self._client.get_media_by_hashtag(hashtag) + except (APIError, ClientResponseError, IOError, OSError) as e: + LOGGER.warning(e) + await asyncio.sleep(5) + else: + self._media.extend(media) + await asyncio.sleep(3) + while len(self._media) >= MEDIA_COUNT_MIN: + await asyncio.sleep(30) + + async def pop(self): + while True: + try: + return self._media.pop(0) + except IndexError: + LOGGER.debug('Has no media to pop') + await asyncio.sleep(5) diff --git a/instabot/stats_service.py b/instabot/stats_service.py new file mode 100644 index 0000000..2a3d436 --- /dev/null +++ b/instabot/stats_service.py @@ -0,0 +1,50 @@ +import asyncio +import logging + +LOGGER = logging.getLogger('instabot.stats_service') + + +class Counter: + def __init__(self): + self._counter = {} + + def clear(self): + self._counter.clear() + + def increment(self, key): + value = self._counter.get(key, 0) + self._counter[key] = value + 1 + + def report(self, prefix): + LOGGER.info('{} {!s}'.format(prefix, self._counter)) + + +class StatsService: + _instance = None + + def __init__(self): + self._hourly_counter = Counter() + self._daily_counter = Counter() + type(self)._instance = self + + @classmethod + def get_instance(cls): + return cls._instance + + async def run(self): + hour = 0 + while True: + await asyncio.sleep(60 * 60) + hour += 1 + if hour % 24 == 0: + self._daily_counter.report( + 'Daily stats #{:.0f}'.format(hour / 24), + ) + self._daily_counter.clear() + else: + self._hourly_counter.report('Hourly stats #{}'.format(hour)) + self._hourly_counter.clear() + + def increment(self, key): + self._hourly_counter.increment(key) + self._daily_counter.increment(key) diff --git a/instabot/user.py b/instabot/user.py new file mode 100644 index 0000000..681f736 --- /dev/null +++ b/instabot/user.py @@ -0,0 +1,24 @@ +import datetime +from peewee import * + +database_proxy = Proxy() + + +class User(Model): + created = DateTimeField(default=datetime.datetime.utcnow) + following_depth = IntegerField() + instagram_id = CharField(max_length=20, unique=True) + is_followed = BooleanField(default=False) + username = CharField(max_length=30) + was_followed_at = DateTimeField(null=True) + were_followers_fetched = BooleanField(default=False) + + class Meta: + database = database_proxy + indexes = ( + (('is_followed', 'was_followed_at'), False), + (('were_followers_fetched', 'following_depth', 'created'), False), + ) + + def get_url(self): + return 'https://www.instagram.com/{0}/'.format(self.username) diff --git a/instabot/user_service.py b/instabot/user_service.py new file mode 100644 index 0000000..1df87bd --- /dev/null +++ b/instabot/user_service.py @@ -0,0 +1,90 @@ +import asyncio +import logging +import peewee +from .errors import APIError, APIJSONError, APILimitError, \ + APINotAllowedError, ConfigurationError +from .stats_service import StatsService +from .user import User +from aiohttp.errors import ClientResponseError + +LOGGER = logging.getLogger('instabot.user_service') + + +class UserService: + def __init__(self, client, configuration): + self._client = client + self._stats_service = StatsService.get_instance() + self._users_to_follow_cache_size = configuration \ + .users_to_follow_cache_size + if self._users_to_follow_cache_size == 0: + raise ConfigurationError('Users to follow count was set to 0.') + + async def run(self): + while True: + try: + await self._ensure_enough_users() + except APILimitError as e: + LOGGER.debug('Instagram limits were reached. {}'.format(e)) + except (APIError, APIJSONError, APINotAllowedError) as e: + LOGGER.debug(e) + await asyncio.sleep(5) + except (IOError, OSError, ClientResponseError) as e: + LOGGER.warning(e) + await asyncio.sleep(5) + else: + await asyncio.sleep(60 * 5) + + async def _ensure_enough_users(self): + users_to_follow_count = User.select() \ + .where(User.was_followed_at == None) \ + .count() + LOGGER.debug('{} users to follow found'.format(users_to_follow_count)) + if users_to_follow_count < self._users_to_follow_cache_size: + last_users_to_follow_count = users_to_follow_count + for user in User.select() \ + .where(User.were_followers_fetched == False) \ + .order_by( + User.following_depth, + User.created, + ): + following_depth = user.following_depth + 1 + try: + followers_json = \ + await self._client.get_some_followers(user) + except APINotAllowedError as e: + LOGGER.debug( + 'Can\'t fetch followers of {}. {}'.format( + user.username, + e, + ), + ) + user.were_followers_fetched = True + user.save() + continue + user.were_followers_fetched = True + user.save() + LOGGER.debug( + '{} followers of {} were fetched'.format( + len(followers_json), + user.username, + ), + ) + for follower_json in followers_json: + try: + User.create( + instagram_id=follower_json['id'], + following_depth=following_depth, + username=follower_json['username'], + ) + except peewee.IntegrityError: + pass + else: + users_to_follow_count += 1 + self._stats_service \ + .increment('users_to_follow_fetched') + if users_to_follow_count >= self._users_to_follow_cache_size: + break + LOGGER.debug( + '%d users were saved in DB', + users_to_follow_count - last_users_to_follow_count, + ) diff --git a/instabot_runner.py b/instabot_runner.py new file mode 100755 index 0000000..f3a231c --- /dev/null +++ b/instabot_runner.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from instabot.instabot import main + +if __name__ == '__main__': + main() diff --git a/profile.yml b/profile.yml deleted file mode 100644 index dd08fce..0000000 --- a/profile.yml +++ /dev/null @@ -1,8 +0,0 @@ -CREDENTIALS: - ACCESS_TOKEN: USER_ACCESS_TOKEN - CLIENT_SECRET: "USER_CLIENT_SECRET" -SLEEPTIME: 1 -MAXLIKES: 10 -PERHASHTAG: 2 -TOP: 1 -IP: "USER_IP_ADDRESS" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1e9533e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +asyncio>=3.4.3,<4.0 +aiohttp>=0.21.6,<0.22 +docopt>=0.6.2,<0.7 +PyYAML>=3.11,<4.0 +peewee>=2.7.4,<3.0 +pymysql>=0.6.7,<0.7