diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index beb3c5fcab..94d8145045 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,10 +46,10 @@ jobs: pip install poetry build poetry self add "poetry-dynamic-versioning[plugin]" - name: Build Pypi package - if: github.ref == 'refs/heads/stable' + if: github.ref == 'refs/heads/stable' || github.ref == 'refs/heads/dev' run: python -m build - name: Publish Pypi package - if: github.ref == 'refs/heads/stable' + if: github.ref == 'refs/heads/stable' || github.ref == 'refs/heads/dev' uses: pypa/gh-action-pypi-publish@release/v1.5 with: password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/README.md b/README.md index 9d62adbcdc..d7c54d8c0f 100644 --- a/README.md +++ b/README.md @@ -9,30 +9,32 @@ pip install bbot [](https://www.python.org) [](https://github.com/psf/black) [](https://github.com/blacklanternsecurity/bbot/blob/dev/LICENSE) [](https://github.com/blacklanternsecurity/bbot/actions?query=workflow%3A"tests") [](https://codecov.io/gh/blacklanternsecurity/bbot) - + -### **BBOT** is a **recursive**, **modular** OSINT framework inspired by Spiderfoot and written in Python. +### **BBOT** is a **recursive**, **modular** OSINT framework inspired by Spiderfoot. -Capable of executing the entire OSINT process in a single command, BBOT does subdomain enumeration, port scanning, web screenshots (with its `gowitness` module), vulnerability scanning (with `nuclei`), and much more. +BBOT can execute the entire OSINT process in a single command: subdomain enumeration, port scans, web screenshots (with `gowitness`), vulnerability scanning (with `nuclei`), and much more. BBOT has over **80 modules** and counting. -BBOT currently has over **70 modules** and counting. - -### [Subdomain Enumeration Face-off](https://blog.blacklanternsecurity.com/p/subdomain-enumeration-tool-face-off) +Read our [blog post](https://blog.blacklanternsecurity.com/p/subdomain-enumeration-tool-face-off) to find out why BBOT is the most thorough subdomain enumeration tool available.  -## Installation (pip) +## Installation ([pip](https://pypi.org/project/bbot/)) +Note: installing in a virtualenv (e.g. via `pipx`) is recommended ~~~bash -# note: installing in a virtualenv is recommended +# stable version pip install bbot +# bleeding edge (dev branch) +pip install --pre bbot + bbot --help ~~~ Prerequisites: -- Linux (Windows including WSL is not supported) +- Linux (Windows, including WSL is not supported) - Python 3.9 or newer -## [Installation (Docker)](https://hub.docker.com/r/blacklanternsecurity/bbot) +## Installation ([Docker](https://hub.docker.com/r/blacklanternsecurity/bbot)) ~~~bash # bleeding edge (dev) docker run -it blacklanternsecurity/bbot --help @@ -50,58 +52,71 @@ See also: [Release History](https://github.com/blacklanternsecurity/bbot/wiki/Re ## Scanning with BBOT -Note: the `httpx` module is recommended in most scans because it is [used by BBOT to visit webpages](https://github.com/blacklanternsecurity/bbot/wiki#note-on-the-httpx-module). - ### Examples ~~~bash -# list modules -bbot -l - -# subdomain enumeration -bbot --flags subdomain-enum --modules httpx --targets evilcorp.com +# subdomains +bbot -t evilcorp.com -f subdomain-enum -# passive modules only -bbot --flags passive --targets evilcorp.com +# subdomains (passive only) +bbot -t evilcorp.com -f subdomain-enum -rf passive -# web screenshots with gowitness -bbot -m naabu httpx gowitness --name my_scan --output-dir . -t subdomains.txt +# subdomains + port scan + web screenshots +bbot -t evilcorp.com -f subdomain-enum -m naabu gowitness -n my_scan -o . -# web scan -bbot -f web-basic -t www.evilcorp.com +# subdomains + basic web scan (wappalyzer, robots.txt, iis shortnames, etc.) +bbot -t evilcorp.com -f subdomain-enum web-basic -# web spider (search for emails, etc.) -bbot -m httpx -c web_spider_distance=2 web_spider_depth=2 -t www.evilcorp.com +# subdomains + web spider (search for emails, etc.) +bbot -t evilcorp.com -f subdomain-enum -c web_spider_distance=2 web_spider_depth=2 # everything at once because yes -bbot -f subdomain-enum email-enum cloud-enum web-basic -m naabu gowitness nuclei --allow-deadly -t evilcorp.com +# subdomains + emails + cloud + port scan + non-intrusive web + web screenshots + nuclei +bbot -t evilcorp.com -f subdomain-enum email-enum cloud-enum web-basic -m naabu gowitness nuclei --allow-deadly + +# list modules +bbot -l ~~~ ### Targets -In BBOT, targets are used to seed a scan. You can specify any number of targets, and if you require more granular control over scope, you can also use whitelists and blacklists. +Targets seed a scan with initial data. You can specify an unlimited number of targets, either directly on the command line or in files (or both!). Targets can be any of the following: + +- DNS_NAME (`evilcorp.com`) +- IP_ADDRESS (`1.2.3.4`) +- IP_RANGE (`1.2.3.0/24`) +- URL (`https://www.evilcorp.com`) +- EMAIL_ADDRESS (`bob@evilcorp.com`) + +For example, the following scan is totally valid: ~~~bash # multiple targets -bbot -t evilcorp.com evilcorp.co.uk 1.2.3.0/24 targets.txt +bbot -t evilcorp.com evilcorp.co.uk http://www.evilcorp.cn 1.2.3.0/24 other_targets.txt +~~~ +#### Whitelists / Blacklists + +BBOT's whitelist determines what's considered to be in-scope. By default, the whitelist is simply your target. But if you want more granular scope control, you can override it with `--whitelist` (or add a `--blacklist`). + +~~~bash # seed a scan with two domains, but only consider assets to be in scope if they are inside 1.2.3.0/24 -bbot -t evilcorp.com evilcorp.co.uk --whitelist 1.2.3.0/24 --blacklist test.evilcorp.com 1.2.3.4 +bbot -t evilcorp.com evilcorp.co.uk --whitelist 1.2.3.0/24 --blacklist test.evilcorp.com 1.2.3.4 blacklist.txt ~~~ -Visit the wiki for more [tips and tricks](https://github.com/blacklanternsecurity/bbot/wiki#tips-and-tricks), including details on how BBOT handles scope, and how to tweak it if you need to. +Visit the wiki for more [tips and tricks](https://github.com/blacklanternsecurity/bbot/wiki#tips-and-tricks). ## Using BBOT as a Python library ~~~python from bbot.scanner import Scanner # any number of targets can be specified -scan = Scanner("evilcorp.com", "1.2.3.0/24", modules=["naabu"]) +scan = Scanner("evilcorp.com", "1.2.3.0/24", modules=["httpx", "sslcert"]) for event in scan.start(): - print(event) + print(event.json()) ~~~ # Output -BBOT can output to TXT, JSON, CSV, Neo4j, and more with `--output-module`. You can output to multiple formats simultaneously. +By default, BBOT saves its output in TXT, JSON, and CSV formats. To enable more output modules, you can use `--output-module`. ~~~bash # tee to a file bbot -f subdomain-enum -t evilcorp.com | tee evilcorp.txt @@ -109,10 +124,10 @@ bbot -f subdomain-enum -t evilcorp.com | tee evilcorp.txt # output to JSON bbot --output-module json -f subdomain-enum -t evilcorp.com | jq -# output to CSV, TXT, and JSON, in current directory -bbot -o . --output-module human csv json -f subdomain-enum -t evilcorp.com +# output asset inventory in current directory +bbot -o . --output-module asset_inventory -f subdomain-enum -t evilcorp.com ~~~ -For every scan, BBOT generates a unique and mildly-entertaining name like `fuzzy_gandalf`. Output for that scan, including the word cloud and any gowitness screenshots, etc., are saved to a folder by that name in `~/.bbot/scans`. The most recent 20 scans are kept, and older ones are removed. You can change the location of BBOT's output with `--output`, and you can also pick a custom scan name with `--name`. +For every scan, BBOT generates a unique and mildly-entertaining name like `demonic_jimmy`. Output for that scan, including the word cloud and any gowitness screenshots, etc., are saved to a folder by that name in `~/.bbot/scans`. The most recent 20 scans are kept, and older ones are removed. You can change the location of BBOT's output with `--output`, and you can also pick a custom scan name with `--name`. If you reuse a scan name, it will append to its original output files and leverage the previous word cloud. @@ -127,16 +142,17 @@ docker run -p 7687:7687 -p 7474:7474 -v "$(pwd)/data/:/data/" --env NEO4J_AUTH=n ~~~ - After that, run bbot with `--output-modules neo4j` ~~~bash -bbot -f subdomain-enum -t evilcorp.com --output-modules human neo4j +bbot -f subdomain-enum -t evilcorp.com --output-modules neo4j ~~~ - Browse data at http://localhost:7474 # Usage ~~~ $ bbot --help -usage: bbot [-h] [--help-all] [-t TARGET [TARGET ...]] [-w WHITELIST [WHITELIST ...]] [-b BLACKLIST [BLACKLIST ...]] [--strict-scope] [-n SCAN_NAME] [-m MODULE [MODULE ...]] [-l] [-em MODULE [MODULE ...]] - [-f FLAG [FLAG ...]] [-rf FLAG [FLAG ...]] [-ef FLAG [FLAG ...]] [-om MODULE [MODULE ...]] [-o DIR] [-c [CONFIG ...]] [--allow-deadly] [-v] [-d] [-s] [--force] [-y] [--dry-run] [--current-config] - [--save-wordcloud FILE] [--load-wordcloud FILE] [--no-deps | --force-deps | --retry-deps | --ignore-failed-deps | --install-all-deps] [-a] [--version] +usage: bbot [-h] [--help-all] [-t TARGET [TARGET ...]] [-w WHITELIST [WHITELIST ...]] [-b BLACKLIST [BLACKLIST ...]] [--strict-scope] [-n SCAN_NAME] [-m MODULE [MODULE ...]] [-l] + [-em MODULE [MODULE ...]] [-f FLAG [FLAG ...]] [-rf FLAG [FLAG ...]] [-ef FLAG [FLAG ...]] [-om MODULE [MODULE ...]] [-o DIR] [-c [CONFIG ...]] [--allow-deadly] [-v] [-d] [-s] + [--force] [-y] [--dry-run] [--current-config] [--save-wordcloud FILE] [--load-wordcloud FILE] + [--no-deps | --force-deps | --retry-deps | --ignore-failed-deps | --install-all-deps] [-a] [--version] Bighuge BLS OSINT Tool @@ -146,18 +162,18 @@ options: -n SCAN_NAME, --name SCAN_NAME Name of scan (default: random) -m MODULE [MODULE ...], --modules MODULE [MODULE ...] - Modules to enable. Choices: affiliates,anubisdb,asn,aspnet_viewstate,azure_tenant,bevigil,binaryedge,bucket_aws,bucket_azure,bucket_gcp,builtwith,bypass403,c99,censys,certspotter,cookie_brute,crobat,crt,dnscommonsrv,dnsdumpster,dnszonetransfer,emailformat,ffuf,ffuf_shortnames,fullhunt,generic_ssrf,getparam_brute,github,gowitness,hackertarget,header_brute,host_header,httpx,hunt,hunterio,iis_shortnames,ipneighbor,leakix,massdns,naabu,ntlm,nuclei,otx,passivetotal,pgp,rapiddns,riddler,securitytrails,shodan_dns,skymem,smuggler,sslcert,sublist3r,telerik,threatminer,url_manipulation,urlscan,vhost,viewdns,virustotal,wappalyzer,wayback,zoomeye + Modules to enable. Choices: affiliates,anubisdb,asn,azure_tenant,badsecrets,bevigil,binaryedge,bucket_aws,bucket_azure,bucket_digitalocean,bucket_gcp,builtwith,bypass403,c99,censys,certspotter,crobat,crt,dnscommonsrv,dnsdumpster,dnszonetransfer,emailformat,ffuf,ffuf_shortnames,fingerprintx,fullhunt,generic_ssrf,github,gowitness,hackertarget,host_header,httpx,hunt,hunterio,iis_shortnames,ipneighbor,ipstack,leakix,masscan,massdns,naabu,ntlm,nuclei,otx,paramminer_cookies,paramminer_getparams,paramminer_headers,passivetotal,pgp,rapiddns,riddler,robots,securitytrails,shodan_dns,skymem,smuggler,sslcert,subdomain_hijack,sublist3r,telerik,threatminer,url_manipulation,urlscan,vhost,viewdns,virustotal,wafw00f,wappalyzer,wayback,zoomeye -l, --list-modules List available modules. -em MODULE [MODULE ...], --exclude-modules MODULE [MODULE ...] Exclude these modules. -f FLAG [FLAG ...], --flags FLAG [FLAG ...] - Enable modules by flag. Choices: active,affiliates,aggressive,brute-force,cloud-enum,deadly,email-enum,iis-shortnames,passive,portscan,report,safe,slow,subdomain-enum,web-advanced,web-basic,web-paramminer,web-screenshots + Enable modules by flag. Choices: active,affiliates,aggressive,cloud-enum,deadly,email-enum,iis-shortnames,passive,portscan,report,safe,service-enum,slow,subdomain-enum,subdomain-hijack,web-advanced,web-basic,web-paramminer,web-screenshots -rf FLAG [FLAG ...], --require-flags FLAG [FLAG ...] Disable modules that don't have these flags (e.g. --require-flags passive) -ef FLAG [FLAG ...], --exclude-flags FLAG [FLAG ...] Disable modules with these flags. (e.g. --exclude-flags brute-force) -om MODULE [MODULE ...], --output-modules MODULE [MODULE ...] - Output module(s). Choices: asset_inventory,csv,http,human,json,neo4j,python,websocket + Output module(s). Choices: asset_inventory,csv,http,human,json,neo4j,python,web_report,websocket -o DIR, --output-dir DIR -c [CONFIG ...], --config [CONFIG ...] custom config file, or configuration options in key=value format: 'modules.shodan.api_key=1234' @@ -233,204 +249,221 @@ For explanations of config options, see `defaults.yml` or the [wiki](https://git ### Note: You can find more fun and interesting modules at the [Module Playground](https://github.com/blacklanternsecurity/bbot-module-playground). For instructions on how to install these other modules, see the [wiki](https://github.com/blacklanternsecurity/bbot/wiki#module-playground). -To view a full list of module config options, use `--help-all`. +To see modules' options (how to change wordlists, thread count, etc.), use `--help-all`. ~~~ -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| Module | Type | Needs | Description | Flags | Produced Events | -| | | API | | | | -| | | Key | | | | -+==================+==========+=========+==========================================+=========================================+==========================================+ -| aspnet_viewstate | scan | | Parse web pages for viewstates and check | active,safe,web-basic | VULNERABILITY | -| | | | them against blacklist3r | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| bucket_aws | scan | | Check for S3 buckets related to target | active,cloud-enum,safe | STORAGE_BUCKET | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| bucket_azure | scan | | Check for Azure storage blobs related to | active,cloud-enum,safe | STORAGE_BUCKET | -| | | | target | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| bucket_gcp | scan | | Check for Google object storage related | active,cloud-enum,safe | STORAGE_BUCKET | -| | | | to target | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| bypass403 | scan | | Check 403 pages for common bypasses | active,aggressive,web-advanced | FINDING | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| cookie_brute | scan | | Check for common HTTP cookie parameters | active,aggressive,brute-force,slow,web- | FINDING | -| | | | | paramminer | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| dnszonetransfer | scan | | Attempt DNS zone transfers | active,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| ffuf | scan | | A fast web fuzzer written in Go | active,aggressive,brute- | URL | -| | | | | force,deadly,web-advanced | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| ffuf_shortnames | scan | | Use ffuf in combination IIS shortnames | active,aggressive,brute-force,iis- | URL | -| | | | | shortnames,web-advanced | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| generic_ssrf | scan | | Check for generic SSRFs | active,aggressive,web-advanced | VULNERABILITY | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| getparam_brute | scan | | Check for common HTTP GET parameters | active,aggressive,brute-force,slow,web- | FINDING | -| | | | | paramminer | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| gowitness | scan | | Take screenshots of webpages | active,safe,web-screenshots | SCREENSHOT | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| header_brute | scan | | Check for common HTTP header parameters | active,aggressive,brute-force,slow,web- | FINDING | -| | | | | paramminer | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| host_header | scan | | Try common HTTP Host header spoofing | active,aggressive,web-advanced | FINDING | -| | | | techniques | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| httpx | scan | | Visit webpages. Many other modules rely | active,safe,web-basic | HTTP_RESPONSE,URL | -| | | | on httpx | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| hunt | scan | | Watch for commonly-exploitable HTTP | active,safe,web-advanced | FINDING | -| | | | parameters | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| iis_shortnames | scan | | Check for IIS shortname vulnerability | active,iis-shortnames,safe,web-basic | URL_HINT | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| naabu | scan | | Execute port scans with naabu | active,aggressive,portscan | OPEN_TCP_PORT | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| ntlm | scan | | Watch for HTTP endpoints that support | active,safe,web-basic | DNS_NAME,FINDING | -| | | | NTLM authentication | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| nuclei | scan | | Fast and customisable vulnerability | active,aggressive,deadly,web-advanced | FINDING,VULNERABILITY | -| | | | scanner | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| smuggler | scan | | Check for HTTP smuggling | active,aggressive,brute-force,slow,web- | FINDING | -| | | | | advanced | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| sslcert | scan | | Visit open ports and retrieve SSL | active,affiliates,email- | DNS_NAME,EMAIL_ADDRESS | -| | | | certificates | enum,safe,subdomain-enum | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| telerik | scan | | Scan for critical Telerik | active,aggressive,slow,web-basic | FINDING,VULNERABILITY | -| | | | vulnerabilities | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| url_manipulation | scan | | Attempt to identify URL parsing/routing | active,aggressive,web-advanced | FINDING | -| | | | based vulnerabilities | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| vhost | scan | | Fuzz for virtual hosts | active,aggressive,brute- | DNS_NAME,VHOST | -| | | | | force,deadly,slow,web-advanced | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| wappalyzer | scan | | Extract technologies from web responses | active,safe,web-basic | TECHNOLOGY | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| affiliates | scan | | Summarize affiliate domains at the end | passive,report,safe | | -| | | | of a scan | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| anubisdb | scan | | Query jldc.me's database for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| asn | scan | | Query ripe and bgpview.io for ASNs | passive,report,safe,subdomain-enum | ASN | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| azure_tenant | scan | | Query Azure for tenant sister domains | affiliates,passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| bevigil | scan | X | Retrieve OSINT data from mobile | passive,safe,subdomain-enum | DNS_NAME,URL_UNVERIFIED | -| | | | applications using BeVigil | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| binaryedge | scan | X | Query the BinaryEdge API | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| builtwith | scan | X | Query Builtwith.com for subdomains | affiliates,passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| c99 | scan | X | Query the C99 API for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| censys | scan | X | Query the Censys API | email-enum,passive,safe,subdomain-enum | DNS_NAME,EMAIL_ADDRESS,IP_ADDRESS,OPEN_P | -| | | | | | ORT,PROTOCOL | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| certspotter | scan | | Query Certspotter's API for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| crobat | scan | | Query Project Crobat for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| crt | scan | | Query crt.sh (certificate transparency) | passive,safe,subdomain-enum | DNS_NAME | -| | | | for subdomains | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| dnscommonsrv | scan | | Check for common SRV records | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| dnsdumpster | scan | | Query dnsdumpster for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| emailformat | scan | | Query email-format.com for email | email-enum,passive,safe | EMAIL_ADDRESS | -| | | | addresses | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| fullhunt | scan | X | Query the fullhunt.io API for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| github | scan | X | Query Github's API for related | passive,safe,subdomain-enum | URL_UNVERIFIED | -| | | | repositories | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| hackertarget | scan | | Query the hackertarget.com API for | passive,safe,subdomain-enum | DNS_NAME | -| | | | subdomains | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| hunterio | scan | X | Query hunter.io for emails | email-enum,passive,safe,subdomain-enum | DNS_NAME,EMAIL_ADDRESS,URL_UNVERIFIED | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| ipneighbor | scan | | Look beside IPs in their surrounding | aggressive,passive,subdomain-enum | IP_ADDRESS | -| | | | subnet | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| leakix | scan | | Query leakix.net for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| massdns | scan | | Brute-force subdomains with massdns | aggressive,brute- | DNS_NAME | -| | | | (highly effective) | force,passive,slow,subdomain-enum | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| otx | scan | | Query otx.alienvault.com for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| passivetotal | scan | X | Query the PassiveTotal API for | passive,safe,subdomain-enum | DNS_NAME | -| | | | subdomains | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| pgp | scan | | Query common PGP servers for email | email-enum,passive,safe | EMAIL_ADDRESS | -| | | | addresses | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| rapiddns | scan | | Query rapiddns.io for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| riddler | scan | | Query riddler.io for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| securitytrails | scan | X | Query the SecurityTrails API for | passive,safe,subdomain-enum | DNS_NAME | -| | | | subdomains | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| shodan_dns | scan | X | Query Shodan for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| skymem | scan | | Query skymem.info for email addresses | email-enum,passive,safe | EMAIL_ADDRESS | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| sublist3r | scan | | Query sublist3r's API for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| threatminer | scan | | Query threatminer's API for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| urlscan | scan | | Query urlscan.io for subdomains | passive,safe,subdomain-enum | DNS_NAME,URL_UNVERIFIED | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| viewdns | scan | | Query viewdns.info's reverse whois for | affiliates,passive,safe | DNS_NAME | -| | | | related domains | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| virustotal | scan | X | Query VirusTotal's API for subdomains | passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| wayback | scan | | Query archive.org's API for subdomains | passive,safe,subdomain-enum | DNS_NAME,URL_UNVERIFIED | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| zoomeye | scan | X | Query ZoomEye's API for subdomains | affiliates,passive,safe,subdomain-enum | DNS_NAME | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| asset_inventory | output | | Output to an asset inventory style | | | -| | | | flattened CSV file | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| csv | output | | Output to CSV | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| http | output | | Output to HTTP | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| human | output | | Output to text | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| json | output | | Output to JSON | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| neo4j | output | | Output to Neo4j | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| python | output | | Output via Python API | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| websocket | output | | Output to websockets | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| aggregate | internal | | Report on scan statistics | passive,safe | SUMMARY | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| excavate | internal | | Passively extract juicy tidbits from | passive | URL_UNVERIFIED | -| | | | scan data | | | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ -| speculate | internal | | Derive certain event types from others | passive | DNS_NAME,FINDING,IP_ADDRESS,OPEN_TCP_POR | -| | | | by common sense | | T | -+------------------+----------+---------+------------------------------------------+-----------------------------------------+------------------------------------------+ ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| Module | Type | Needs | Description | Flags | Produced Events | +| | | API | | | | +| | | Key | | | | ++======================+==========+=========+==========================================+========================================+==========================================+ +| badsecrets | scan | | Library for detecting known or weak | active,safe,web-basic | FINDING,VULNERABILITY | +| | | | secrets across many web frameworks | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| bucket_aws | scan | | Check for S3 buckets related to target | active,cloud-enum,safe,web-basic | FINDING,STORAGE_BUCKET | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| bucket_azure | scan | | Check for Azure storage blobs related to | active,cloud-enum,safe,web-basic | FINDING,STORAGE_BUCKET | +| | | | target | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| bucket_digitalocean | scan | | Check for DigitalOcean spaces related to | active,cloud-enum,safe,web-basic | FINDING,STORAGE_BUCKET | +| | | | target | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| bucket_gcp | scan | | Check for Google object storage related | active,cloud-enum,safe,web-basic | FINDING,STORAGE_BUCKET | +| | | | to target | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| bypass403 | scan | | Check 403 pages for common bypasses | active,aggressive | FINDING | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| dnszonetransfer | scan | | Attempt DNS zone transfers | active,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| ffuf | scan | | A fast web fuzzer written in Go | active,aggressive,deadly,web-advanced | URL_UNVERIFIED | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| ffuf_shortnames | scan | | Use ffuf in combination IIS shortnames | active,aggressive,iis-shortnames | URL_UNVERIFIED | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| fingerprintx | scan | | Fingerprint exposed services like RDP, | active,safe,service-enum,slow | PROTOCOL | +| | | | SSH, MySQL, etc. | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| generic_ssrf | scan | | Check for generic SSRFs | active,aggressive | VULNERABILITY | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| gowitness | scan | | Take screenshots of webpages | active,safe,web-screenshots | SCREENSHOT | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| host_header | scan | | Try common HTTP Host header spoofing | active,aggressive | FINDING | +| | | | techniques | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| httpx | scan | | Visit webpages. Many other modules rely | active,safe,subdomain-enum,web-basic | HTTP_RESPONSE,URL | +| | | | on httpx | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| hunt | scan | | Watch for commonly-exploitable HTTP | active,safe,web-basic | FINDING | +| | | | parameters | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| iis_shortnames | scan | | Check for IIS shortname vulnerability | active,iis-shortnames,safe,web-basic | URL_HINT | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| masscan | scan | | Port scan IP subnets with masscan | active,aggressive,portscan | OPEN_TCP_PORT | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| naabu | scan | | Execute port scans with naabu | active,aggressive,portscan | OPEN_TCP_PORT | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| ntlm | scan | | Watch for HTTP endpoints that support | active,safe,web-basic | DNS_NAME,FINDING | +| | | | NTLM authentication | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| nuclei | scan | | Fast and customisable vulnerability | active,aggressive,deadly,web-advanced | FINDING,VULNERABILITY | +| | | | scanner | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| paramminer_cookies | scan | | Check for common HTTP cookie parameters | active,aggressive,slow,web-paramminer | FINDING | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| paramminer_getparams | scan | | Use smart brute-force to check for | active,aggressive,slow,web-paramminer | FINDING | +| | | | common HTTP GET parameters | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| paramminer_headers | scan | | Use smart brute-force to check for | active,aggressive,slow,web-paramminer | FINDING | +| | | | common HTTP header parameters | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| robots | scan | | Look for and parse robots.txt | active,safe,web-basic | URL_UNVERIFIED | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| smuggler | scan | | Check for HTTP smuggling | active,aggressive,slow | FINDING | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| sslcert | scan | | Visit open ports and retrieve SSL | active,affiliates,email- | DNS_NAME,EMAIL_ADDRESS | +| | | | certificates | enum,safe,subdomain-enum,web-basic | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| subdomain_hijack | scan | | Detect hijackable subdomains | active,cloud-enum,safe,subdomain- | FINDING | +| | | | | enum,subdomain-hijack,web-basic | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| telerik | scan | | Scan for critical Telerik | active,aggressive,slow | FINDING,VULNERABILITY | +| | | | vulnerabilities | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| url_manipulation | scan | | Attempt to identify URL parsing/routing | active,aggressive | FINDING | +| | | | based vulnerabilities | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| vhost | scan | | Fuzz for virtual hosts | active,aggressive,deadly,slow,web- | DNS_NAME,VHOST | +| | | | | advanced | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| wafw00f | scan | | Web Application Firewall Fingerprinting | active,aggressive | WAF | +| | | | Tool | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| wappalyzer | scan | | Extract technologies from web responses | active,safe,web-basic | TECHNOLOGY | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| affiliates | scan | | Summarize affiliate domains at the end | passive,report,safe | | +| | | | of a scan | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| anubisdb | scan | | Query jldc.me's database for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| asn | scan | | Query ripe and bgpview.io for ASNs | passive,report,safe,subdomain-enum | ASN | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| azure_tenant | scan | | Query Azure for tenant sister domains | affiliates,passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| bevigil | scan | X | Retrieve OSINT data from mobile | passive,safe,subdomain-enum | DNS_NAME,URL_UNVERIFIED | +| | | | applications using BeVigil | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| binaryedge | scan | X | Query the BinaryEdge API | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| builtwith | scan | X | Query Builtwith.com for subdomains | affiliates,passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| c99 | scan | X | Query the C99 API for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| censys | scan | X | Query the Censys API | email-enum,passive,safe,subdomain-enum | DNS_NAME,EMAIL_ADDRESS,IP_ADDRESS,OPEN_P | +| | | | | | ORT,PROTOCOL | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| certspotter | scan | | Query Certspotter's API for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| crobat | scan | | Query Project Crobat for subdomains | passive,safe | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| crt | scan | | Query crt.sh (certificate transparency) | passive,safe,subdomain-enum | DNS_NAME | +| | | | for subdomains | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| dnscommonsrv | scan | | Check for common SRV records | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| dnsdumpster | scan | | Query dnsdumpster for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| emailformat | scan | | Query email-format.com for email | email-enum,passive,safe | EMAIL_ADDRESS | +| | | | addresses | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| fullhunt | scan | X | Query the fullhunt.io API for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| github | scan | X | Query Github's API for related | passive,safe,subdomain-enum | URL_UNVERIFIED | +| | | | repositories | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| hackertarget | scan | | Query the hackertarget.com API for | passive,safe,subdomain-enum | DNS_NAME | +| | | | subdomains | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| hunterio | scan | X | Query hunter.io for emails | email-enum,passive,safe,subdomain-enum | DNS_NAME,EMAIL_ADDRESS,URL_UNVERIFIED | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| ipneighbor | scan | | Look beside IPs in their surrounding | aggressive,passive,subdomain-enum | IP_ADDRESS | +| | | | subnet | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| ipstack | scan | X | Query IPStack's API for GeoIP | passive,safe | GEOLOCATION | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| leakix | scan | | Query leakix.net for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| massdns | scan | | Brute-force subdomains with massdns | aggressive,passive,slow,subdomain-enum | DNS_NAME | +| | | | (highly effective) | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| otx | scan | | Query otx.alienvault.com for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| passivetotal | scan | X | Query the PassiveTotal API for | passive,safe,subdomain-enum | DNS_NAME | +| | | | subdomains | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| pgp | scan | | Query common PGP servers for email | email-enum,passive,safe | EMAIL_ADDRESS | +| | | | addresses | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| rapiddns | scan | | Query rapiddns.io for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| riddler | scan | | Query riddler.io for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| securitytrails | scan | X | Query the SecurityTrails API for | passive,safe,subdomain-enum | DNS_NAME | +| | | | subdomains | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| shodan_dns | scan | X | Query Shodan for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| skymem | scan | | Query skymem.info for email addresses | email-enum,passive,safe | EMAIL_ADDRESS | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| sublist3r | scan | | Query sublist3r's API for subdomains | passive,safe | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| threatminer | scan | | Query threatminer's API for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| urlscan | scan | | Query urlscan.io for subdomains | passive,safe,subdomain-enum | DNS_NAME,URL_UNVERIFIED | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| viewdns | scan | | Query viewdns.info's reverse whois for | affiliates,passive,safe | DNS_NAME | +| | | | related domains | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| virustotal | scan | X | Query VirusTotal's API for subdomains | passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| wayback | scan | | Query archive.org's API for subdomains | passive,safe,subdomain-enum | DNS_NAME,URL_UNVERIFIED | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| zoomeye | scan | X | Query ZoomEye's API for subdomains | affiliates,passive,safe,subdomain-enum | DNS_NAME | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| asset_inventory | output | | Output to an asset inventory style | | | +| | | | flattened CSV file | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| csv | output | | Output to CSV | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| http | output | | Send every event to a custom URL via a | | | +| | | | web request | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| human | output | | Output to text | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| json | output | | Output to JSON | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| neo4j | output | | Output to Neo4j | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| python | output | | Output via Python API | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| web_report | output | | Create a markdown report with web assets | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| websocket | output | | Output to websockets | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| aggregate | internal | | Report on scan statistics | passive,safe | SUMMARY | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| excavate | internal | | Passively extract juicy tidbits from | passive | URL_UNVERIFIED | +| | | | scan data | | | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ +| speculate | internal | | Derive certain event types from others | passive | DNS_NAME,FINDING,IP_ADDRESS,OPEN_TCP_POR | +| | | | by common sense | | T | ++----------------------+----------+---------+------------------------------------------+----------------------------------------+------------------------------------------+ ~~~ # Credit -BBOT is written by @TheTechromancer. Web hacking in BBOT is made possible by @liquidsec, who wrote most of the web-oriented modules and helpers. +BBOT is written by @TheTechromancer. Web hacking in BBOT is made possible by @liquidsec, who wrote most of the web modules and helpers. Very special thanks to the following people who made BBOT possible: - @kerrymilan for his Neo4j and Ansible expertise -- Steve Micallef (@smicallef) for creating Spiderfoot, by which BBOT is heavily inspired +- Steve Micallef (@smicallef) for creating Spiderfoot - Aleksei Kornev (@alekseiko) for allowing us ownership of the bbot Pypi repository <3 diff --git a/bbot/agent/agent.py b/bbot/agent/agent.py index ada7e2916c..b377054fbf 100644 --- a/bbot/agent/agent.py +++ b/bbot/agent/agent.py @@ -60,7 +60,7 @@ def send(self, message): break except Exception as e: log.warning(f"Error sending message: {e}, retrying") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) sleep(1) continue @@ -143,7 +143,7 @@ def stop_scan(self): return {"success": msg, "scan_id": scan_id} except Exception as e: log.warning(f"Error while stopping scan: {e}") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) finally: self.scan = None self.thread = None @@ -171,15 +171,15 @@ def err_handle(callback, *args, **kwargs): except Exception as e: msg = f"Error in {callback.__qualname__}(): {e}" log.error(msg) - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) return {"error": msg} def _start_scan(self, scan): try: - scan.start() + scan.start_without_generator() except bbot.core.errors.ScanError as e: log.error(f"Scan error: {e}") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) except Exception: log.critical(f"Encountered error: {traceback.format_exc()}") self.on_scan_status("FAILED", scan.id) diff --git a/bbot/cli.py b/bbot/cli.py index 58bcb30633..9c6b1c490e 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -3,6 +3,7 @@ import os import sys import logging +import threading import traceback from omegaconf import OmegaConf from contextlib import suppress @@ -11,7 +12,7 @@ sys.stdout.reconfigure(line_buffering=True) # logging -from bbot.core.logger import init_logging, get_log_level +from bbot.core.logger import init_logging, get_log_level, toggle_log_level logging_queue, logging_handlers = init_logging() @@ -20,6 +21,7 @@ from bbot.modules import module_loader from bbot.core.configurator.args import parser from bbot.core.helpers.logger import log_to_stderr +from bbot.core.configurator import ensure_config_files log = logging.getLogger("bbot.cli") sys.stdout.reconfigure(line_buffering=True) @@ -32,12 +34,12 @@ def main(): - err = False scan_name = "" - try: + ensure_config_files() + try: if len(sys.argv) == 1: parser.print_help() sys.exit(1) @@ -94,8 +96,20 @@ def main(): log.verbose(f'Enabling {m} because it has flag "{f}"') modules.add(m) + default_output_modules = ["human", "json", "csv"] + + # Make a list of the modules which can be output to the console + consoleable_output_modules = [ + k for k, v in module_loader.preloaded(type="output").items() if "console" in v["config"] + ] + + # If no options are specified, use the default set if not options.output_modules: - options.output_modules = ["human"] + options.output_modules = default_output_modules + + # if none of the output modules provided on the command line are consoleable, don't turn off the defaults. Instead, just add the one specified to the defaults. + elif not any(o in consoleable_output_modules for o in options.output_modules): + options.output_modules += default_output_modules scanner = Scanner( *options.targets, @@ -166,20 +180,23 @@ def main(): log.verbose( f"Removing {m} because it does not have the required flags: {'+'.join(options.require_flags)}" ) - modules.remove(m) + with suppress(KeyError): + modules.remove(m) # excluded flags for m in scanner._scan_modules: flags = module_loader._preloaded.get(m, {}).get("flags", []) if any(f in flags for f in options.exclude_flags): log.verbose(f"Removing {m} because of excluded flag: {','.join(options.exclude_flags)}") - modules.remove(m) + with suppress(KeyError): + modules.remove(m) # excluded modules for m in options.exclude_modules: if m in modules: log.verbose(f"Removing {m} because it is excluded") - modules.remove(m) + with suppress(KeyError): + modules.remove(m) scanner._scan_modules = list(modules) log_fn = log.info @@ -210,15 +227,42 @@ def main(): return module_list = module_loader.filter_modules(modules=modules) - deadly_modules = [ - m[0] for m in module_list if "deadly" in m[-1]["flags"] and m[0] in scanner._scan_modules - ] - if scanner._scan_modules and deadly_modules: - if not options.allow_deadly: + deadly_modules = [] + active_modules = [] + active_aggressive_modules = [] + slow_modules = [] + for m in module_list: + if m[0] in scanner._scan_modules: + if "deadly" in m[-1]["flags"]: + deadly_modules.append(m[0]) + if "active" in m[-1]["flags"]: + active_modules.append(m[0]) + if "aggressive" in m[-1]["flags"]: + active_aggressive_modules.append(m[0]) + if "slow" in m[-1]["flags"]: + slow_modules.append(m[0]) + if scanner._scan_modules: + if deadly_modules and not options.allow_deadly: log.hugewarning(f"You enabled the following deadly modules: {','.join(deadly_modules)}") log.hugewarning(f"Deadly modules are highly intrusive") log.hugewarning(f"Please specify --allow-deadly to continue") return + if active_modules: + if active_modules: + if active_aggressive_modules: + log.hugewarning( + "This is an (aggressive) active scan! Intrusive connections will be made to target" + ) + else: + log.warning( + "This is a (safe) active scan. Non-intrusive connections will be made to target" + ) + else: + log.hugeinfo("This is a passive scan. No connections will be made to target") + if slow_modules: + log.hugewarning( + f"You have enabled the following slow modules: {','.join(slow_modules)}. Scan may take longer than usual" + ) scanner.helpers.word_cloud.load(options.load_wordcloud) @@ -229,6 +273,15 @@ def main(): log.hugesuccess(f"Scan ready. Press enter to execute {scanner.name}") input() + def keyboard_listen(): + while 1: + keyboard_input = input() + if not keyboard_input: + toggle_log_level(logger=log) + + keyboard_listen_thread = threading.Thread(target=keyboard_listen, daemon=True) + keyboard_listen_thread.start() + scanner.start_without_generator() except bbot.core.errors.ScanError as e: @@ -258,12 +311,12 @@ def main(): finally: # save word cloud - with suppress(Exception): + with suppress(BaseException): save_success, filename = scanner.helpers.word_cloud.save(options.save_wordcloud) if save_success: log_to_stderr(f"Saved word cloud ({len(scanner.helpers.word_cloud):,} words) to {filename}") # remove output directory if empty - with suppress(Exception): + with suppress(BaseException): scanner.home.rmdir() if err: os._exit(1) diff --git a/bbot/core/configurator/__init__.py b/bbot/core/configurator/__init__.py index 9ca76cca1e..d29d335794 100644 --- a/bbot/core/configurator/__init__.py +++ b/bbot/core/configurator/__init__.py @@ -11,16 +11,18 @@ # cached sudo password bbot_sudo_pass = None +modules_config = OmegaConf.create( + { + "modules": module_loader.configs(type="scan"), + "output_modules": module_loader.configs(type="output"), + "internal_modules": module_loader.configs(type="internal"), + } +) + try: config = OmegaConf.merge( # first, pull module defaults - OmegaConf.create( - { - "modules": module_loader.configs(type="scan"), - "output_modules": module_loader.configs(type="output"), - "internal_modules": module_loader.configs(type="internal"), - } - ), + modules_config, # then look in .yaml files files.get_config(), # finally, pull from CLI arguments @@ -33,25 +35,39 @@ config = environ.prepare_environment(config) -# ensure bbot.yml -if not files.config_filename.exists(): - log_to_stderr(f"Creating BBOT config at {files.config_filename}") - no_secrets_config = OmegaConf.to_object(config) - no_secrets_config = clean_dict(no_secrets_config, "api_key", "username", "password", "token", fuzzy=True) - yaml = OmegaConf.to_yaml(no_secrets_config) - yaml = "\n".join(f"# {line}" for line in yaml.splitlines()) - with open(str(files.config_filename), "w") as f: - f.write(yaml) - -# ensure secrets.yml -if not files.secrets_filename.exists(): - log_to_stderr(f"Creating BBOT secrets at {files.secrets_filename}") - secrets_only_config = OmegaConf.to_object(config) - secrets_only_config = filter_dict( - secrets_only_config, "api_key", "username", "password", "token", "secret", "_id", fuzzy=True - ) - yaml = OmegaConf.to_yaml(secrets_only_config) - yaml = "\n".join(f"# {line}" for line in yaml.splitlines()) - with open(str(files.secrets_filename), "w") as f: - f.write(yaml) - files.secrets_filename.chmod(0o600) +def ensure_config_files(): + default_config = OmegaConf.merge(files.default_config, modules_config) + + secrets_strings = ["api_key", "username", "password", "token", "secret", "_id"] + exclude_keys = ["modules", "output_modules", "internal_modules"] + + # ensure bbot.yml + if not files.config_filename.exists(): + log_to_stderr(f"Creating BBOT config at {files.config_filename}") + no_secrets_config = OmegaConf.to_object(default_config) + no_secrets_config = clean_dict( + no_secrets_config, + *secrets_strings, + fuzzy=True, + exclude_keys=exclude_keys, + ) + yaml = OmegaConf.to_yaml(no_secrets_config) + yaml = "\n".join(f"# {line}" for line in yaml.splitlines()) + with open(str(files.config_filename), "w") as f: + f.write(yaml) + + # ensure secrets.yml + if not files.secrets_filename.exists(): + log_to_stderr(f"Creating BBOT secrets at {files.secrets_filename}") + secrets_only_config = OmegaConf.to_object(default_config) + secrets_only_config = filter_dict( + secrets_only_config, + *secrets_strings, + fuzzy=True, + exclude_keys=exclude_keys, + ) + yaml = OmegaConf.to_yaml(secrets_only_config) + yaml = "\n".join(f"# {line}" for line in yaml.splitlines()) + with open(str(files.secrets_filename), "w") as f: + f.write(yaml) + files.secrets_filename.chmod(0o600) diff --git a/bbot/core/configurator/args.py b/bbot/core/configurator/args.py index dc21e57f19..c068dff9c8 100644 --- a/bbot/core/configurator/args.py +++ b/bbot/core/configurator/args.py @@ -59,30 +59,27 @@ def error(self, message): epilog = """EXAMPLES - list modules: - bbot -l - - subdomain enumeration: - bbot -t evilcorp.com -f subdomain-enum -m httpx + Subdomains: + bbot -t evilcorp.com -f subdomain-enum - passive modules only: - bbot -t evilcorp.com -f passive + Subdomains (passive only): + bbot -t evilcorp.com -f subdomain-enum -rf passive - subdomains + web screenshots: - bbot -t targets.txt -f subdomain-enum -m httpx gowitness --name my_scan --output-dir . + Subdomains + port scan + web screenshots: + bbot -t evilcorp.com -f subdomain-enum -m naabu gowitness -n my_scan -o . - subdomains + basic web scanning: + Subdomains + basic web scan (wappalyzer, robots.txt, iis shortnames, etc.): bbot -t evilcorp.com -f subdomain-enum web-basic - single module: - bbot -t evilcorp.com -m github -c modules.github.api_key=deadbeef - - web spider + advanced web scan: - bbot -t www.evilcorp.com -m httpx -f web-basic web-advanced -c web_spider_distance=2 web_spider_depth=2 + Subdomains + web spider (search for emails, etc.): + bbot -t evilcorp.com -f subdomain-enum -c web_spider_distance=2 web_spider_depth=2 - subdomains + emails + cloud buckets + portscan + screenshots + nuclei: + Subdomains + emails + cloud + port scan + non-intrusive web + web screenshots + nuclei: bbot -t evilcorp.com -f subdomain-enum email-enum cloud-enum web-basic -m naabu gowitness nuclei --allow-deadly + List modules: + bbot -l + """ @@ -148,7 +145,7 @@ def error(self, message): "-om", "--output-modules", nargs="+", - default=["human"], + default=["human", "json", "csv"], help=f'Output module(s). Choices: {",".join(output_module_choices)}', metavar="MODULE", ) diff --git a/bbot/core/configurator/environ.py b/bbot/core/configurator/environ.py index 79d99bea7d..70e8520f92 100644 --- a/bbot/core/configurator/environ.py +++ b/bbot/core/configurator/environ.py @@ -5,6 +5,7 @@ from . import args from ...modules import module_loader +from ..helpers.misc import cpu_architecture, os_platform, os_platform_friendly def flatten_config(config, base="bbot"): @@ -52,6 +53,11 @@ def prepare_environment(bbot_config): bbot_lib = home / "lib" os.environ["BBOT_LIB"] = str(bbot_lib) + # platform variables + os.environ["BBOT_OS_PLATFORM"] = os_platform() + os.environ["BBOT_OS"] = os_platform_friendly() + os.environ["BBOT_CPU_ARCH"] = cpu_architecture() + # exchange certain options between CLI args and config if args.cli_options is not None: # deps diff --git a/bbot/core/configurator/files.py b/bbot/core/configurator/files.py index 6a8442ac64..1d96f34ad4 100644 --- a/bbot/core/configurator/files.py +++ b/bbot/core/configurator/files.py @@ -10,6 +10,7 @@ mkdir(config_dir) config_filename = (config_dir / "bbot.yml").resolve() secrets_filename = (config_dir / "secrets.yml").resolve() +default_config = None def _get_config(filename, name="config", notify=True): @@ -26,9 +27,10 @@ def _get_config(filename, name="config", notify=True): def get_config(): - + global default_config + default_config = _get_config(defaults_filename, name="defaults") return OmegaConf.merge( - _get_config(defaults_filename, name="defaults"), + default_config, _get_config(config_filename, name="config"), _get_config(secrets_filename, name="secrets"), ) diff --git a/bbot/core/errors.py b/bbot/core/errors.py index 4b16b7ac15..fd6fa5ebb0 100644 --- a/bbot/core/errors.py +++ b/bbot/core/errors.py @@ -51,3 +51,7 @@ class WordlistError(BBOTError): class DNSError(BBOTError): pass + + +class CurlError(BBOTError): + pass diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index f4c5dd1c83..358decd784 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1,6 +1,7 @@ import json import logging import ipaddress +import traceback from typing import Optional from datetime import datetime from contextlib import suppress @@ -22,6 +23,8 @@ smart_decode, get_file_extension, validators, + smart_decode_punycode, + tagify, ) @@ -29,11 +32,12 @@ class BaseEvent: - + # Always emit this event type even if it's not in scope + _always_emit = False + # Always emit events with these tags even if they're not in scope + _always_emit_tags = ["affiliate"] # Exclude from output modules _omit = False - # Priority, 1-5, lower numbers == higher priority - _priority = 3 # Disables certain data validations _dummy = False # Data validation, if data is a dictionary @@ -53,12 +57,13 @@ def __init__( _dummy=False, _internal=None, ): - self._id = None self._hash = None self.__host = None self._port = None self.__words = None + self._priority = None + self._module_priority = None self._resolved_hosts = set() self._made_internal = False @@ -67,12 +72,12 @@ def __init__( self.timestamp = datetime.utcnow() - if tags is None: - tags = set() + self._tags = set() + if tags is not None: + self._tags = set(tagify(s) for s in tags) self._data = None self.type = event_type - self.tags = set(tags) self.confidence = int(confidence) # for creating one-off events without enforcing source requirement @@ -102,9 +107,7 @@ def __init__( try: self.data = self._sanitize_data(data) except Exception as e: - import traceback - - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) raise ValidationError(f'Error sanitizing event data "{data}" for type "{self.type}": {e}') if not self.data: @@ -165,6 +168,13 @@ def host(self): @property def port(self): self.host + if getattr(self, "parsed", None): + if self.parsed.port is not None: + return self.parsed.port + elif self.parsed.scheme == "https": + return 443 + elif self.parsed.scheme == "http": + return 80 return self._port @property @@ -187,6 +197,27 @@ def words(self): def _words(self): return set() + @property + def tags(self): + return self._tags + + @tags.setter + def tags(self, tags): + if isinstance(tags, str): + tags = (tags,) + self._tags = set(tagify(s) for s in tags) + + def add_tag(self, tag): + self._tags.add(tagify(tag)) + + def remove_tag(self, tag): + with suppress(KeyError): + self._tags.remove(tagify(tag)) + + @property + def always_emit(self): + return self._always_emit or any(t in self.tags for t in self._always_emit_tags) + @property def id(self): if self._id is None: @@ -210,8 +241,8 @@ def scope_distance(self, scope_distance): self._scope_distance = new_scope_distance for t in list(self.tags): if t.startswith("distance-"): - self.tags.remove(t) - self.tags.add(f"distance-{new_scope_distance}") + self.remove_tag(t) + self.add_tag(f"distance-{new_scope_distance}") @property def source(self): @@ -248,7 +279,7 @@ def get_source(self): def make_internal(self): if not self._made_internal: self._internal = True - self.tags.add("internal") + self.add_tag("internal") self._made_internal = True def unmake_internal(self, set_scope_distance=None, force_output=False): @@ -257,7 +288,7 @@ def unmake_internal(self, set_scope_distance=None, force_output=False): if set_scope_distance is not None: self.scope_distance = set_scope_distance self._internal = False - self.tags.remove("internal") + self.remove_tag("internal") if force_output: self._force_output = True self._made_internal = False @@ -280,7 +311,7 @@ def make_in_scope(self, set_scope_distance=0): source_trail = self.unmake_internal(set_scope_distance=set_scope_distance, force_output=True) self.scope_distance = set_scope_distance if set_scope_distance == 0: - self.tags.add("in-scope") + self.add_tag("in-scope") return source_trail def _host(self): @@ -292,6 +323,7 @@ def _sanitize_data(self, data): if not isinstance(data, dict): raise ValidationError(f"data is not of type dict: {data}") data = self._data_validator(**data).dict() + data = {k: v for k, v in data.items() if v is not None} return self.sanitize_data(data) def sanitize_data(self, data): @@ -324,18 +356,25 @@ def _data_id(self): return self.data @property - def data_graph(self): + def pretty_string(self): """ Graph representation of event.data """ - return self._data_graph() + return self._pretty_string() - def _data_graph(self): + def _pretty_string(self): if isinstance(self.data, dict): with suppress(Exception): return json.dumps(self.data, sort_keys=True) return smart_decode(self.data) + @property + def data_graph(self): + """ + Representation of event.data for neo4j graph nodes + """ + return self.pretty_string + @property def data_json(self): """ @@ -405,12 +444,26 @@ def json(self, mode="json"): def from_json(j): return event_from_json(j) + @property + def module_priority(self): + if self._module_priority is None: + module = getattr(self, "module", None) + self._module_priority = int(max(1, min(5, getattr(module, "priority", 3)))) + return self._module_priority + + @module_priority.setter + def module_priority(self, priority): + self._module_priority = int(max(1, min(5, priority))) + @property def priority(self): - self_priority = int(max(1, min(5, self._priority))) - mod_priority = int(max(1, min(5, getattr(self.module, "priority", 1)))) - timestamp = self.timestamp.timestamp() - return self_priority + mod_priority + (1 / timestamp) + if self._priority is None: + timestamp = self.timestamp.timestamp() + if self.source.timestamp == self.timestamp: + self._priority = (timestamp,) + else: + self._priority = getattr(self.source, "priority", ()) + (timestamp,) + return self._priority def __iter__(self): """ @@ -422,13 +475,13 @@ def __lt__(self, other): """ For queue sorting """ - return self.priority < int(getattr(other, "priority", 5)) + return self.priority < getattr(other, "priority", (0,)) def __gt__(self, other): """ For queue sorting """ - return self.priority > int(getattr(other, "priority", 5)) + return self.priority > getattr(other, "priority", (0,)) def __eq__(self, other): try: @@ -450,12 +503,28 @@ def __repr__(self): return str(self) +class FINISHED(BaseEvent): + """ + Special signal event to indicate end of scan + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._priority = (999999999999999999999,) + + class DefaultEvent(BaseEvent): def sanitize_data(self, data): return data class DictEvent(BaseEvent): + def sanitize_data(self, data): + url = data.get("url", "") + if url: + self.parsed = validators.validate_url_parsed(url) + return data + def _data_human(self): return json.dumps(self.data, sort_keys=True) @@ -479,7 +548,7 @@ def _host(self): self.parsed = validators.validate_url_parsed(self.data["url"]) return make_ip_type(self.parsed.hostname) - def _data_graph(self): + def _pretty_string(self): return self.data["url"] @@ -487,9 +556,9 @@ class IP_ADDRESS(BaseEvent): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) ip = ipaddress.ip_address(self.data) - self.tags.add(f"ipv{ip.version}") + self.add_tag(f"ipv{ip.version}") if ip.is_private: - self.tags.add("private") + self.add_tag("private") self.dns_resolve_distance = getattr(self.source, "dns_resolve_distance", 0) def sanitize_data(self, data): @@ -513,14 +582,14 @@ def __init__(self, *args, **kwargs): self.dns_resolve_distance = getattr(source, "dns_resolve_distance", 0) if source_module_type == "DNS": self.dns_resolve_distance += 1 - # self.tags.add(f"resolve-distance-{self.dns_resolve_distance}") + # self.add_tag(f"resolve-distance-{self.dns_resolve_distance}") class IP_RANGE(DnsEvent): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) net = ipaddress.ip_network(self.data, strict=False) - self.tags.add(f"ipv{net.version}") + self.add_tag(f"ipv{net.version}") def sanitize_data(self, data): return str(ipaddress.ip_network(str(data), strict=False)) @@ -530,14 +599,12 @@ def _host(self): class DNS_NAME(DnsEvent): - _priority = 2 - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if is_subdomain(self.data): - self.tags.add("subdomain") + self.add_tag("subdomain") elif is_domain(self.data): - self.tags.add("domain") + self.add_tag("domain") def sanitize_data(self, data): return validators.validate_host(data) @@ -579,9 +646,9 @@ def sanitize_data(self, data): # tag as dir or endpoint if str(self.parsed.path).endswith("/"): - self.tags.add("dir") + self.add_tag("dir") else: - self.tags.add("endpoint") + self.add_tag("endpoint") parsed_path_lower = str(self.parsed.path).lower() @@ -594,11 +661,11 @@ def sanitize_data(self, data): extension = get_file_extension(parsed_path_lower) if extension: - self.tags.add(f"extension-{extension}") + self.add_tag(f"extension-{extension}") if extension in url_extension_blacklist: - self.tags.add("blacklisted") + self.add_tag("blacklisted") if extension in url_extension_httpx_only: - self.tags.add("httpx-only") + self.add_tag("httpx-only") self._omit = True data = self.parsed.geturl() @@ -617,15 +684,6 @@ def _words(self): def _host(self): return make_ip_type(self.parsed.hostname) - @property - def port(self): - if self.parsed.port is not None: - return self.parsed.port - elif self.parsed.scheme == "https": - return 443 - elif self.parsed.scheme == "http": - return 80 - class URL(URL_UNVERIFIED): def sanitize_data(self, data): @@ -639,11 +697,13 @@ def sanitize_data(self, data): def resolved_hosts(self): return [i.split("-")[1] for i in self.tags if i.startswith("ip-")] + @property + def pretty_string(self): + return self.data -class STORAGE_BUCKET(URL_UNVERIFIED, DictEvent): - def sanitize_data(self, data): - self.parsed = validators.validate_url_parsed(data["url"]) - return data + +class STORAGE_BUCKET(DictEvent, URL_UNVERIFIED): + _always_emit = True class _data_validator(BaseModel): name: str @@ -671,8 +731,6 @@ def _words(self): class HTTP_RESPONSE(URL_UNVERIFIED, DictEvent): - _priority = 2 - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.web_spider_distance = getattr(self.source, "web_spider_distance", 0) @@ -699,13 +757,15 @@ def sanitize_data(self, data): def _words(self): return set() + def _pretty_string(self): + return f'{self.data["hash"]["header_mmh3"]}:{self.data["hash"]["body_mmh3"]}' + class VULNERABILITY(DictHostEvent): - _priority = 1 + _always_emit = True - def _sanitize_data(self, data): - data = super()._sanitize_data(data) - self.tags.add(data["severity"].lower()) + def sanitize_data(self, data): + self.add_tag(data["severity"].lower()) return data class _data_validator(BaseModel): @@ -716,12 +776,12 @@ class _data_validator(BaseModel): _validate_host = validator("host", allow_reuse=True)(validators.validate_host) _validate_severity = validator("severity", allow_reuse=True)(validators.validate_severity) - def _data_graph(self): + def _pretty_string(self): return f'[{self.data["severity"]}] {self.data["description"]}' class FINDING(DictHostEvent): - _priority = 1 + _always_emit = True class _data_validator(BaseModel): host: str @@ -729,20 +789,22 @@ class _data_validator(BaseModel): url: Optional[str] _validate_host = validator("host", allow_reuse=True)(validators.validate_host) - def _data_graph(self): + def _pretty_string(self): return self.data["description"] class TECHNOLOGY(DictHostEvent): - _priority = 2 - class _data_validator(BaseModel): host: str technology: str url: Optional[str] _validate_host = validator("host", allow_reuse=True)(validators.validate_host) - def _data_graph(self): + def _data_id(self): + tech = self.data.get("technology", "") + return f"{self.host}:{self.port}:{tech}" + + def _pretty_string(self): return self.data["technology"] @@ -753,7 +815,7 @@ class _data_validator(BaseModel): url: Optional[str] _validate_host = validator("host", allow_reuse=True)(validators.validate_host) - def _data_graph(self): + def _pretty_string(self): return self.data["vhost"] @@ -761,13 +823,21 @@ class PROTOCOL(DictHostEvent): class _data_validator(BaseModel): host: str protocol: str - _validate_host = validator("host", allow_reuse=True)(validators.validate_open_port) + port: Optional[int] + banner: Optional[str] + _validate_host = validator("host", allow_reuse=True)(validators.validate_host) + _validate_port = validator("port", allow_reuse=True)(validators.validate_port) - def _host(self): - host, self._port = split_host_port(self.data["host"]) - return host + def sanitize_data(self, data): + new_data = dict(data) + new_data["protocol"] = data.get("protocol", "").upper() + return new_data + + @property + def port(self): + return self.data.get("port", None) - def _data_graph(self): + def _pretty_string(self): return self.data["protocol"] @@ -802,11 +872,11 @@ def make_event( return data else: if event_type is None: + if isinstance(data, str): + data = smart_decode_punycode(data) event_type = get_event_type(data) if not dummy: log.debug(f'Autodetected event type "{event_type}" based on data: "{data}"') - if event_type is None: - raise ValidationError(f'Unable to autodetect event type from "{data}"') event_type = str(event_type).strip().upper() @@ -820,6 +890,7 @@ def make_event( try: data = validators.validate_host(data) except Exception as e: + log.trace(traceback.format_exc()) raise ValidationError(f'Error sanitizing event data "{data}" for type "{event_type}": {e}') data_is_ip = is_ip(data) if event_type == "DNS_NAME" and data_is_ip: diff --git a/bbot/core/event/helpers.py b/bbot/core/event/helpers.py index 872283038e..5820183ec2 100644 --- a/bbot/core/event/helpers.py +++ b/bbot/core/event/helpers.py @@ -2,7 +2,8 @@ import ipaddress from contextlib import suppress -from bbot.core.helpers import sha1, smart_decode +from bbot.core.errors import ValidationError +from bbot.core.helpers import sha1, smart_decode, smart_decode_punycode from bbot.core.helpers.regexes import event_type_regexes, event_id_regex, _hostname_regex @@ -14,7 +15,7 @@ def get_event_type(data): Attempt to divine event type from data """ - data = smart_decode(data).strip() + data = smart_decode_punycode(smart_decode(data).strip()) # IP address with suppress(Exception): @@ -38,6 +39,8 @@ def get_event_type(data): if _hostname_regex.match(data): return "DNS_NAME" + raise ValidationError(f'Unable to autodetect event type from "{data}"') + def is_event_id(s): if event_id_regex.match(str(s)): diff --git a/bbot/core/helpers/cloud/aws.py b/bbot/core/helpers/cloud/aws.py index 4f96dccd94..e88bf57a4b 100644 --- a/bbot/core/helpers/cloud/aws.py +++ b/bbot/core/helpers/cloud/aws.py @@ -2,7 +2,6 @@ class AWS(BaseCloudProvider): - domains = [ "amazon-dss.com", "amazonaws.com", diff --git a/bbot/core/helpers/cloud/azure.py b/bbot/core/helpers/cloud/azure.py index a4fd9eee2b..9f70817673 100644 --- a/bbot/core/helpers/cloud/azure.py +++ b/bbot/core/helpers/cloud/azure.py @@ -3,20 +3,22 @@ class Azure(BaseCloudProvider): domains = [ - "windows.net", - "azure.com", "azmk8s.io", "azure-api.net", "azure-mobile.net", + "azure.com", + "azure.net", "azurecontainer.io", "azurecr.io", + "azuredatalakestore.net", "azureedge.net", "azurefd.net", + "azurehdinsight.net", "azurewebsites.net", "cloudapp.net", + "windows.net", "onmicrosoft.com", "trafficmanager.net", - "vault.azure.net", "visualstudio.com", "vo.msecnd.net", ] diff --git a/bbot/core/helpers/cloud/base.py b/bbot/core/helpers/cloud/base.py index b818c8a4d4..0cce30028d 100644 --- a/bbot/core/helpers/cloud/base.py +++ b/bbot/core/helpers/cloud/base.py @@ -5,7 +5,6 @@ class BaseCloudProvider: - domains = [] regexes = {} @@ -59,8 +58,19 @@ def is_valid_bucket(self, bucket_name): return self.bucket_name_regex.match(bucket_name) def tag_event(self, event): - if event.host and isinstance(event.host, str): - for r in self.domain_regexes: - if r.match(event.host): + # tag the event if + if event.host: + # its host directly matches this cloud provider's domains + if isinstance(event.host, str) and self.domain_match(event.host): + event.tags.update(self.base_tags) + return + # or it has a CNAME that matches this cloud provider's domains + for rh in event.resolved_hosts: + if not self.parent_helper.is_ip(rh) and self.domain_match(rh): event.tags.update(self.base_tags) - break + + def domain_match(self, s): + for r in self.domain_regexes: + if r.match(s): + return True + return False diff --git a/bbot/core/helpers/command.py b/bbot/core/helpers/command.py index c44f62adef..c79be6e09a 100644 --- a/bbot/core/helpers/command.py +++ b/bbot/core/helpers/command.py @@ -34,12 +34,20 @@ def run_live(self, command, *args, **kwargs): if not "stderr" in kwargs: kwargs["stderr"] = subprocess.PIPE _input = kwargs.pop("input", "") + sudo = kwargs.pop("sudo", False) input_msg = "" if _input: kwargs["stdin"] = subprocess.PIPE input_msg = " (with stdin)" command = [str(s) for s in command] + env = kwargs.get("env", os.environ) + if sudo: + self.depsinstaller.ensure_root() + env["SUDO_ASKPASS"] = str((self.tools_dir / self.depsinstaller.askpass_filename).resolve()) + env["BBOT_SUDO_PASS"] = self.depsinstaller._sudo_password + kwargs["env"] = env + command = ["sudo", "-A"] + command log.hugeverbose(f"run_live{input_msg}: {' '.join(command)}") try: with catch(subprocess.Popen, command, *args, **kwargs) as process: @@ -77,8 +85,16 @@ def run(self, command, *args, **kwargs): kwargs["stderr"] = subprocess.PIPE if not "text" in kwargs: kwargs["text"] = True + sudo = kwargs.pop("sudo", False) command = [str(s) for s in command] + env = kwargs.get("env", os.environ) + if sudo: + self.depsinstaller.ensure_root() + env["SUDO_ASKPASS"] = str((self.tools_dir / self.depsinstaller.askpass_filename).resolve()) + env["BBOT_SUDO_PASS"] = self.depsinstaller._sudo_password + kwargs["env"] = env + command = ["sudo", "-A"] + command log.hugeverbose(f"run: {' '.join(command)}") result = catch(subprocess.run, command, *args, **kwargs) @@ -97,15 +113,15 @@ def catch(callback, *args, **kwargs): return callback(*args, **kwargs) except FileNotFoundError as e: log.warning(f"{e} - missing executable?") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) except BrokenPipeError as e: log.warning(f"Error in subprocess: {e}") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) def tempfile(self, content, pipe=True): """ - tempfile("temp\nfile\ncontent") --> Path("/home/user/.bbot/temp/pgxml13bov87oqrvjz7a") + tempfile(["temp", "file", "content"]) --> Path("/home/user/.bbot/temp/pgxml13bov87oqrvjz7a") if "pipe" is True (the default), a named pipe is used instead of a true file, which allows python data to be piped directly into the @@ -126,7 +142,7 @@ def tempfile(self, content, pipe=True): f.write(line) except Exception as e: log.error(f"Error creating temp file: {e}") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) return filename @@ -159,7 +175,7 @@ def _feed_pipe(self, pipe, content, text=True): self.scan.stop() except Exception as e: log.error(f"Error in _feed_pipe(): {e}") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) def feed_pipe(self, pipe, content, text=True): @@ -179,7 +195,7 @@ def tempfile_tail(self, callback): t.start() except Exception as e: log.error(f"Error setting up tail for file {filename}: {e}") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) return return filename @@ -192,4 +208,4 @@ def tail(filename, callback): callback(line) except Exception as e: log.error(f"Error tailing file {filename}: {e}") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py index fd3de34031..17f81e4b08 100644 --- a/bbot/core/helpers/depsinstaller/installer.py +++ b/bbot/core/helpers/depsinstaller/installer.py @@ -1,18 +1,20 @@ import os import sys +import stat import json import shutil import getpass import logging from time import sleep +from pathlib import Path from itertools import chain from contextlib import suppress from ansible_runner.interface import run from subprocess import CalledProcessError -from bbot.modules import module_loader -from ..misc import can_sudo_without_password from bbot.core import configurator +from bbot.modules import module_loader +from ..misc import can_sudo_without_password, os_platform log = logging.getLogger("bbot.core.helpers.depsinstaller") @@ -25,6 +27,7 @@ def __init__(self, parent_helper): http_timeout = self.parent_helper.config.get("http_timeout", 30) os.environ["ANSIBLE_TIMEOUT"] = str(http_timeout) + self.askpass_filename = "sudo_askpass.py" self._sudo_password = os.environ.get("BBOT_SUDO_PASS", None) if self._sudo_password is None: if configurator.bbot_sudo_pass is not None: @@ -167,14 +170,16 @@ def apt_install(self, packages): packages_str = ",".join(packages) log.info(f"Installing the following OS packages: {packages_str}") args = {"name": packages_str, "state": "present"} # , "update_cache": True, "cache_valid_time": 86400} - success, err = self.ansible_run( - module="package", - args=args, - ansible_args={ - "ansible_become": True, - "ansible_become_method": "sudo", - }, - ) + kwargs = {} + # don't sudo brew + if os_platform() != "darwin": + kwargs = { + "ansible_args": { + "ansible_become": True, + "ansible_become_method": "sudo", + } + } + success, err = self.ansible_run(module="package", args=args, **kwargs) if success: log.info(f'Successfully installed OS packages "{packages_str}"') else: @@ -226,6 +231,14 @@ def ansible_run(self, tasks=None, module=None, args=None, ansible_args=None): log.debug(f"ansible_run(module={module}, args={args}, ansible_args={ansible_args})") playbook = None if tasks: + for task in tasks: + if "package" in task: + # special case for macos + if os_platform() == "darwin": + # don't sudo brew + task["become"] = False + # brew doesn't support update_cache + task["package"].pop("update_cache", "") playbook = {"hosts": "all", "tasks": tasks} log.debug(json.dumps(playbook, indent=2)) if self._sudo_password is not None: @@ -289,11 +302,16 @@ def ensure_root(self, message=""): log.warning("Incorrect password") def install_core_deps(self): + to_install = set() + # install custom askpass script + askpass_src = Path(__file__).resolve().parent / self.askpass_filename + askpass_dst = self.parent_helper.tools_dir / self.askpass_filename + shutil.copy(askpass_src, askpass_dst) + askpass_dst.chmod(askpass_dst.stat().st_mode | stat.S_IEXEC) # ensure tldextract data is cached self.parent_helper.tldextract("evilcorp.co.uk") # command: package_name core_deps = {"unzip": "unzip", "curl": "curl"} - to_install = set() for command, package_name in core_deps.items(): if not self.parent_helper.which(command): to_install.add(package_name) diff --git a/bbot/core/helpers/depsinstaller/sudo_askpass.py b/bbot/core/helpers/depsinstaller/sudo_askpass.py new file mode 100644 index 0000000000..42eccc1674 --- /dev/null +++ b/bbot/core/helpers/depsinstaller/sudo_askpass.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +import os + +print(os.environ.get("BBOT_SUDO_PASS", ""), end="") diff --git a/bbot/core/helpers/diff.py b/bbot/core/helpers/diff.py index 5b267ad646..43f668dfd8 100644 --- a/bbot/core/helpers/diff.py +++ b/bbot/core/helpers/diff.py @@ -10,18 +10,23 @@ class HttpCompare: - def __init__(self, baseline_url, parent_helper, method="GET", allow_redirects=False): - + def __init__(self, baseline_url, parent_helper, method="GET", allow_redirects=False, include_cache_buster=True): self.parent_helper = parent_helper self.baseline_url = baseline_url + self.include_cache_buster = include_cache_buster # vanilla URL - url_1 = self.parent_helper.add_get_params(self.baseline_url, self.gen_cache_buster()).geturl() + if self.include_cache_buster: + url_1 = self.parent_helper.add_get_params(self.baseline_url, self.gen_cache_buster()).geturl() + else: + url_1 = self.baseline_url baseline_1 = self.parent_helper.request(url_1, allow_redirects=allow_redirects, method=method) sleep(1) # put random parameters in URL, headers, and cookies - get_params = self.gen_cache_buster() - get_params.update({self.parent_helper.rand_string(6): self.parent_helper.rand_string(6)}) + get_params = {self.parent_helper.rand_string(6): self.parent_helper.rand_string(6)} + + if self.include_cache_buster: + get_params.update(self.gen_cache_buster()) url_2 = self.parent_helper.add_get_params(self.baseline_url, get_params).geturl() baseline_2 = self.parent_helper.request( url_2, @@ -70,14 +75,13 @@ def __init__(self, baseline_url, parent_helper, method="GET", allow_redirects=Fa ] dynamic_headers = self.compare_headers(baseline_1.headers, baseline_2.headers) - self.baseline_ignore_headers += dynamic_headers + self.baseline_ignore_headers += [x.lower() for x in dynamic_headers] self.baseline_body_distance = self.compare_body(baseline_1_json, baseline_2_json) def gen_cache_buster(self): return {self.parent_helper.rand_string(6): "1"} def compare_headers(self, headers_1, headers_2): - differing_headers = [] for i, headers in enumerate((headers_1, headers_2)): @@ -99,7 +103,6 @@ def compare_headers(self, headers_1, headers_2): return differing_headers def compare_body(self, content_1, content_2): - if content_1 == content_2: return True @@ -124,8 +127,11 @@ def compare( """ reflection = False - cache_key, cache_value = list(self.gen_cache_buster().items())[0] - url = self.parent_helper.add_get_params(subject, {cache_key: cache_value}).geturl() + if self.include_cache_buster: + cache_key, cache_value = list(self.gen_cache_buster().items())[0] + url = self.parent_helper.add_get_params(subject, {cache_key: cache_value}).geturl() + else: + url = subject subject_response = self.parent_helper.request( url, headers=headers, cookies=cookies, allow_redirects=allow_redirects, method=method ) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 9e81307c9d..400c870c47 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -2,14 +2,16 @@ import json import logging import ipaddress +import traceback +import cloudcheck import dns.resolver import dns.exception from threading import Lock from contextlib import suppress from concurrent.futures import ThreadPoolExecutor +from .threadpool import NamedLock from .regexes import dns_name_regex -from .threadpool import ThreadPoolWrapper, NamedLock from bbot.core.errors import ValidationError, DNSError from .misc import is_ip, is_domain, domain_parents, parent_domain, rand_string @@ -25,7 +27,6 @@ class DNSHelper: all_rdtypes = ["A", "AAAA", "SRV", "MX", "NS", "SOA", "CNAME", "TXT"] def __init__(self, parent_helper): - self.parent_helper = parent_helper try: self.resolver = dns.resolver.Resolver() @@ -34,7 +35,7 @@ def __init__(self, parent_helper): self.timeout = self.parent_helper.config.get("dns_timeout", 5) self.retries = self.parent_helper.config.get("dns_retries", 1) self.abort_threshold = self.parent_helper.config.get("dns_abort_threshold", 5) - self.dns_resolve_distance = self.parent_helper.config.get("dns_resolve_distance", 4) + self.max_dns_resolve_distance = self.parent_helper.config.get("max_dns_resolve_distance", 4) self.resolver.timeout = self.timeout self.resolver.lifetime = self.timeout self._resolver_list = None @@ -58,8 +59,7 @@ def __init__(self, parent_helper): # we need our own threadpool because using the shared one can lead to deadlocks max_workers = self.parent_helper.config.get("max_dns_threads", 100) - executor = ThreadPoolExecutor(max_workers=max_workers) - self._thread_pool = ThreadPoolWrapper(executor, max_workers=max_workers) + self._thread_pool = ThreadPoolExecutor(max_workers=max_workers) self._debug = self.parent_helper.config.get("dns_debug", False) @@ -91,7 +91,7 @@ def resolve(self, query, **kwargs): """ results = set() raw_results, errors = self.resolve_raw(query, **kwargs) - for (rdtype, answers) in raw_results: + for rdtype, answers in raw_results: for answer in answers: for _, t in self.extract_targets(answer): results.add(t) @@ -134,6 +134,12 @@ def resolve_raw(self, query, **kwargs): return (results, errors) + def submit_task(self, *args, **kwargs): + try: + return self._thread_pool.submit(*args, **kwargs) + except RuntimeError as e: + log.debug(f"Error submitting DNS thread task: {e}") + def _resolve_hostname(self, query, **kwargs): self.debug(f"Resolving {query} with kwargs={kwargs}") results = [] @@ -146,16 +152,16 @@ def _resolve_hostname(self, query, **kwargs): parent_hash = hash(f"{parent}:{rdtype}") dns_cache_hash = hash(f"{query}:{rdtype}") while tries_left > 0: - error_count = self._errors.get(parent_hash, 0) - if error_count >= self.abort_threshold: - log.verbose( - f'Aborting query "{query}" because failed {rdtype} queries for "{parent}" ({error_count:,}) exceeded abort threshold ({self.abort_threshold:,})' - ) - return results, errors try: try: results = self._dns_cache[dns_cache_hash] except KeyError: + error_count = self._errors.get(parent_hash, 0) + if error_count >= self.abort_threshold: + log.verbose( + f'Aborting query "{query}" because failed {rdtype} queries for "{parent}" ({error_count:,}) exceeded abort threshold ({self.abort_threshold:,})' + ) + return results, errors results = list(self._catch(self.resolver.resolve, query, **kwargs)) if cache_result: self._dns_cache[dns_cache_hash] = results @@ -169,10 +175,10 @@ def _resolve_hostname(self, query, **kwargs): self._errors[parent_hash] += 1 except KeyError: self._errors[parent_hash] = 1 - log.verbose( - f'DNS error or timeout for {rdtype} query "{query}" ({self._errors[parent_hash]:,} so far): {e}' - ) - errors.append(e) + log.verbose( + f'DNS error or timeout for {rdtype} query "{query}" ({self._errors[parent_hash]:,} so far): {e}' + ) + errors.append(e) # don't retry if we get a SERVFAIL if isinstance(e, dns.resolver.NoNameservers): break @@ -215,60 +221,78 @@ def _resolve_ip(self, query, **kwargs): self.debug(f"Results for {query} with kwargs={kwargs}: {results}") return results, errors - def resolve_event(self, event): - result = self._resolve_event(event) + def resolve_event(self, event, minimal=False): + result = self._resolve_event(event, minimal=minimal) # if it's a wildcard, go again with _wildcard.{domain} if len(result) == 2: event, wildcard_rdtypes = result - return self._resolve_event(event, wildcard_rdtypes) + return self._resolve_event(event, minimal=minimal, _wildcard_rdtypes=wildcard_rdtypes) # else we're good else: return result - def _resolve_event(self, event, _wildcard_rdtypes=None): + def _resolve_event(self, event, minimal=False, _wildcard_rdtypes=None): """ Tag event with appropriate dns record types Optionally create child events from dns resolutions """ event_tags = set() - try: - if not event.host or event.type in ("IP_RANGE",): - return [], set(), False, False, set() - children = [] - event_host = str(event.host) - - event_whitelisted = False - event_blacklisted = False - - resolved_hosts = set() - - # wildcard check first + if not event.host or event.type in ("IP_RANGE",): + return [], set(), False, False, set() + children = [] + event_host = str(event.host) + + event_whitelisted = False + event_blacklisted = False + + resolved_hosts = set() + + # wildcard checks + if not is_ip(event.host): + # check if this domain is using wildcard dns + for hostname, wildcard_domain_rdtypes in self.is_wildcard_domain(event_host).items(): + if wildcard_domain_rdtypes: + event_tags.add("wildcard-domain") + for rdtype, ips in wildcard_domain_rdtypes.items(): + event_tags.add(f"{rdtype.lower()}-wildcard-domain") + # check if the dns name itself is a wildcard entry if _wildcard_rdtypes is None: wildcard_rdtypes = self.is_wildcard(event_host) else: wildcard_rdtypes = _wildcard_rdtypes - for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items(): - wildcard_tag = "error" - if is_wildcard == True: - event_tags.add("wildcard") - wildcard_tag = "wildcard" - event_tags.add(f"{rdtype.lower()}-{wildcard_tag}") - - # lock to ensure resolution of the same host doesn't start while we're working here - with self._event_cache_locks.get_lock(event_host): - # try to get data from cache - _event_tags, _event_whitelisted, _event_blacklisted, _resolved_hosts = self.event_cache_get(event_host) - event_tags.update(_event_tags) - # if we found it, return it - if _event_whitelisted is not None: - return children, event_tags, _event_whitelisted, _event_blacklisted, _resolved_hosts - - # then resolve - if event.type == "DNS_NAME": - types = "any" - else: - types = ("A", "AAAA") - resolved_raw, errors = self.resolve_raw(event_host, type=types, cache_result=True) + for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items(): + wildcard_tag = "error" + if is_wildcard == True: + event_tags.add("wildcard") + wildcard_tag = "wildcard" + event_tags.add(f"{rdtype.lower()}-{wildcard_tag}") + + # lock to ensure resolution of the same host doesn't start while we're working here + with self._event_cache_locks.get_lock(event_host): + # try to get data from cache + _event_tags, _event_whitelisted, _event_blacklisted, _resolved_hosts = self.event_cache_get(event_host) + event_tags.update(_event_tags) + # if we found it, return it + if _event_whitelisted is not None: + return children, event_tags, _event_whitelisted, _event_blacklisted, _resolved_hosts + + # then resolve + if event.type == "DNS_NAME" and not minimal: + types = self.all_rdtypes + else: + types = ("A", "AAAA") + + futures = {} + for t in types: + future = self.submit_task( + self._catch_keyboardinterrupt, self.resolve_raw, event_host, type=t, cache_result=True + ) + if future is None: + break + futures[future] = t + + for future in self.parent_helper.as_completed(futures): + resolved_raw, errors = future.result() for rdtype, e in errors: event_tags.add(f"{rdtype.lower()}-error") for rdtype, records in resolved_raw: @@ -280,7 +304,7 @@ def _resolve_event(self, event, _wildcard_rdtypes=None): for r in records: for _, t in self.extract_targets(r): if t: - if rdtype in ("A", "AAAA"): + if rdtype in ("A", "AAAA", "CNAME"): ip = self.parent_helper.make_ip_type(t) with suppress(ValidationError): @@ -296,40 +320,51 @@ def _resolve_event(self, event, _wildcard_rdtypes=None): continue children.append((rdtype, t)) - # if the host resolves and we haven't checked wildcards yet - if children and _wildcard_rdtypes is None: - # these are the rdtypes that successfully resolve - resolved_rdtypes = set([c[0].upper() for c in children]) - # these are the rdtypes that have wildcards - wildcard_rdtypes_set = set(wildcard_rdtypes) - # consider the event a full wildcard if all its records are wildcards - event_is_wildcard = all(r in wildcard_rdtypes_set for r in resolved_rdtypes) - if event_is_wildcard and event.type in ("DNS_NAME",) and not "_wildcard" in event.data.split("."): - wildcard_parent = self.parent_helper.parent_domain(event_host) - for rdtype, (_is_wildcard, _parent_domain) in wildcard_rdtypes.items(): - if _is_wildcard: - wildcard_parent = _parent_domain - break - wildcard_data = f"_wildcard.{wildcard_parent}" - if wildcard_data != event.data: - log.debug(f'Wildcard detected, changing event.data "{event.data}" --> "{wildcard_data}"') - event.data = wildcard_data - return (event, wildcard_rdtypes) - - if "resolved" not in event_tags: - event_tags.add("unresolved") - for ip in resolved_hosts: - try: - ip = ipaddress.ip_address(ip) - if ip.is_private: - event_tags.add("private-ip") - except ValueError: - continue + # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com) + if not is_ip(event.host) and children and _wildcard_rdtypes is None: + # these are the rdtypes that successfully resolve + resolved_rdtypes = set([c[0].upper() for c in children]) + # these are the rdtypes that have wildcards + wildcard_rdtypes_set = set(wildcard_rdtypes) + # consider the event a full wildcard if all its records are wildcards + event_is_wildcard = all(r in wildcard_rdtypes_set for r in resolved_rdtypes) + if event_is_wildcard and event.type in ("DNS_NAME",) and not "_wildcard" in event.data.split("."): + wildcard_parent = self.parent_helper.parent_domain(event_host) + for rdtype, (_is_wildcard, _parent_domain) in wildcard_rdtypes.items(): + if _is_wildcard: + wildcard_parent = _parent_domain + break + wildcard_data = f"_wildcard.{wildcard_parent}" + if wildcard_data != event.data: + log.debug(f'Wildcard detected, changing event.data "{event.data}" --> "{wildcard_data}"') + event.data = wildcard_data + return (event, wildcard_rdtypes) + + if not self.parent_helper.in_tests: + ips = set() + if event.type == "IP_ADDRESS": + ips.add(event.data) + for rdtype, target in children: + if rdtype in ("A", "AAAA"): + ips.add(target) + for ip in ips: + provider, subnet = cloudcheck.check(ip) + if provider: + event_tags.add(f"cloud-{provider.lower()}") + + if "resolved" not in event_tags: + event_tags.add("unresolved") + for ip in resolved_hosts: + try: + ip = ipaddress.ip_address(ip) + if ip.is_private: + event_tags.add("private-ip") + except ValueError: + continue - self._event_cache[event_host] = (event_tags, event_whitelisted, event_blacklisted, resolved_hosts) - return children, event_tags, event_whitelisted, event_blacklisted, resolved_hosts - finally: - event._resolved.set() + self._event_cache[event_host] = (event_tags, event_whitelisted, event_blacklisted, resolved_hosts) + + return children, event_tags, event_whitelisted, event_blacklisted, resolved_hosts def event_cache_get(self, host): try: @@ -346,7 +381,9 @@ def resolve_batch(self, queries, **kwargs): """ futures = dict() for query in queries: - future = self._thread_pool.submit_task(self._catch_keyboardinterrupt, self.resolve, query, **kwargs) + future = self.submit_task(self._catch_keyboardinterrupt, self.resolve, query, **kwargs) + if future is None: + break futures[future] = query for future in self.parent_helper.as_completed(futures): query = futures[future] @@ -414,106 +451,6 @@ def get_valid_resolvers(self, min_reliability=0.99): resolver_list = self.verify_nameservers(nameservers) return resolver_list - @property - def resolvers(self): - """ - Returns set() of valid DNS servers from public-dns.info - """ - if self._resolver_list is None: - file_content = self.parent_helper.cache_get("resolver_list", cache_hrs=24 * 30) - if file_content is not None: - self._resolver_list = set([l for l in file_content.splitlines() if l]) - if not self._resolver_list: - resolvers = self.get_valid_resolvers() - if resolvers: - self._resolver_list = resolvers - self.parent_helper.cache_put("resolver_list", "\n".join(self._resolver_list)) - else: - return set() - return self._resolver_list - - @property - def mass_resolver_file(self): - self.resolvers - return self.parent_helper.cache_filename("resolver_list") - - def verify_nameservers(self, nameservers, timeout=2): - """Check each resolver to make sure it can actually resolve DNS names - - Args: - nameservers (list): nameservers to verify - timeout (int): timeout for dns query - """ - log.info(f"Verifying {len(nameservers):,} public nameservers. Please be patient, this may take a while.") - futures = [] - for nameserver in nameservers: - # don't use the system nameservers - if nameserver in self.system_resolvers: - continue - futures.append( - self._thread_pool.submit_task(self._catch_keyboardinterrupt, self.verify_nameserver, nameserver) - ) - - valid_nameservers = set() - for future in self.parent_helper.as_completed(futures): - nameserver, error = future.result() - if error is None: - self.debug(f'Nameserver "{nameserver}" is valid') - valid_nameservers.add(nameserver) - else: - self.debug(str(error)) - if not valid_nameservers: - log.hugewarning( - "Unable to reach any nameservers. Please check your internet connection and ensure that DNS is not blocked outbound." - ) - else: - log.info(f"Successfully verified {len(valid_nameservers):,}/{len(nameservers):,} nameservers") - - return valid_nameservers - - def verify_nameserver(self, nameserver, timeout=2): - """Validate a nameserver by making a sample query and a garbage query - - Args: - nameserver (str): nameserver to verify - timeout (int): timeout for dns query - """ - self.debug(f'Verifying nameserver "{nameserver}"') - error = None - - resolver = dns.resolver.Resolver() - resolver.timeout = timeout - resolver.lifetime = timeout - resolver.nameservers = [nameserver] - - # first, make sure it can resolve a valid hostname - try: - a_results = [str(r) for r in list(resolver.resolve("dns.google", "A"))] - aaaa_results = [str(r) for r in list(resolver.resolve("dns.google", "AAAA"))] - if not ("2001:4860:4860::8888" in aaaa_results and "8.8.8.8" in a_results): - error = f"Nameserver {nameserver} failed to resolve basic query" - except Exception: - error = f"Nameserver {nameserver} failed to resolve basic query within {timeout} seconds" - - # then, make sure it isn't feeding us garbage data - randhost = f"www-m.{rand_string(9, digits=False)}.{rand_string(10, digits=False)}.com" - if error is None: - try: - a_results = list(resolver.resolve(randhost, "A")) - error = f"Nameserver {nameserver} returned garbage data" - except dns.exception.DNSException: - pass - # Garbage query to nameserver failed successfully ;) - if error is None: - try: - a_results = list(resolver.resolve(randhost, "AAAA")) - error = f"Nameserver {nameserver} returned garbage data" - except dns.exception.DNSException: - pass - # Garbage query to nameserver failed successfully ;) - - return nameserver, error - def _catch(self, callback, *args, **kwargs): try: return callback(*args, **kwargs) @@ -528,22 +465,24 @@ def _catch(self, callback, *args, **kwargs): log.warning(f"Error in {callback.__qualname__}() with args={args}, kwargs={kwargs}") return list() - def is_wildcard(self, query, ips=None): + def is_wildcard(self, query, ips=None, rdtype=None): """ Use this method to check whether a *host* is a wildcard entry - This can reliably tell the difference between a valid DNS record and a wildcard entry in a wildcard domain. + This can reliably tell the difference between a valid DNS record and a wildcard inside a wildcard domain. If you want to know whether a domain is using wildcard DNS, use is_wildcard_domain() instead. Returns a dictionary in the following format: {rdtype: (is_wildcard, wildcard_parent)} - is_wildcard("www.github.io" --> {"A": (True, "github.io"), "AAAA": (True, "github.io")}) + is_wildcard("www.github.io") --> {"A": (True, "github.io"), "AAAA": (True, "github.io")} Note that is_wildcard can be True, False, or None (indicating that wildcard detection was inconclusive) """ result = {} + if rdtype is None: + rdtype = "ANY" query = self._clean_dns_record(query) # skip check if it's an IP @@ -560,107 +499,133 @@ def is_wildcard(self, query, ips=None): parent = parent_domain(query) parents = list(domain_parents(query)) - for rdtype in self.all_rdtypes: - # resolve the base query - if ips is None: - query_ips = set() - raw_results, errors = self.resolve_raw(query, type=rdtype, cache_result=True) + futures = [] + base_query_ips = dict() + # if the caller hasn't already done the work of resolving the IPs + if ips is None: + # then resolve the query for all rdtypes + for _rdtype in self.all_rdtypes: + # resolve the base query + future = self.submit_task( + self._catch_keyboardinterrupt, self.resolve_raw, query, type=_rdtype, cache_result=True + ) + if future is None: + break + futures.append(future) + + for future in self.parent_helper.as_completed(futures): + raw_results, errors = future.result() if errors and not raw_results: - self.debug(f"Failed to resolve {query} ({rdtype}) during wildcard detection") - result[rdtype] = (None, parent) + self.debug(f"Failed to resolve {query} ({_rdtype}) during wildcard detection") + result[_rdtype] = (None, parent) continue - for (rdtype, answers) in raw_results: + for _rdtype, answers in raw_results: + base_query_ips[_rdtype] = set() for answer in answers: for _, t in self.extract_targets(answer): - query_ips.add(t) - else: - query_ips = set([self._clean_dns_record(ip) for ip in ips]) + base_query_ips[_rdtype].add(t) + else: + # otherwise, we can skip all that + base_query_ips[rdtype] = set([self._clean_dns_record(ip) for ip in ips]) + if not base_query_ips: + return result + + # once we've resolved the base query and have IP addresses to work with + # we can compare the IPs to the ones we have on file for wildcards + # for every rdtype + for _rdtype in self.all_rdtypes: + # get the IPs from above + query_ips = base_query_ips.get("ANY", base_query_ips.get(_rdtype, set())) if not query_ips: continue - + # for every parent domain, starting with the longest for host in parents[::-1]: host_hash = hash(host) - # if host_hash not in self._wildcard_cache: + # make sure we've checked that domain for wildcards self.is_wildcard_domain(host) - # if we've seen this domain before if host_hash in self._wildcard_cache: + # then get its IPs from our wildcard cache wildcard_rdtypes = self._wildcard_cache[host_hash] - # otherwise check to see if the dns name matches the wildcard IPs - if rdtype in wildcard_rdtypes: - wildcard_ips = wildcard_rdtypes[rdtype] - # if the results are the same as the wildcard IPs, then ladies and gentlemen we have a wildcard - is_wildcard = all(r in wildcard_ips for r in query_ips) + # then check to see if our IPs match the wildcard ones + if _rdtype in wildcard_rdtypes: + wildcard_ips = wildcard_rdtypes[_rdtype] + # if our IPs match the wildcard ones, then ladies and gentlemen we have a wildcard + is_wildcard = any(r in wildcard_ips for r in query_ips) if is_wildcard: - result[rdtype] = (True, host) + result[_rdtype] = (True, host) break return result - def is_wildcard_domain(self, domain, retries=5): + def is_wildcard_domain(self, domain): """ Check whether a domain is using wildcard DNS Returns a dictionary containing any DNS record types that are wildcards, and their associated IPs is_wildcard_domain("github.io") --> {"A": {"1.2.3.4",}, "AAAA": {"dead::beef",}} """ + wildcard_domain_results = {} domain = self._clean_dns_record(domain) + # make a list of its parents parents = list(domain_parents(domain, include_self=True)) - num_parents = len(parents) - # and check each of them, beginning with the highest parent (e.g. evilcorp.com) + # and check each of them, beginning with the highest parent (i.e. the root domain) for i, host in enumerate(parents[::-1]): # have we checked this host before? host_hash = hash(host) with self._wildcard_lock.get_lock(host_hash): # if we've seen this host before if host_hash in self._wildcard_cache: - # return true if it's a wildcard - if self._wildcard_cache[host_hash]: - return self._wildcard_cache[host_hash] - # return false if it's not a wildcard and it's the last one we're checking - elif i + 1 == num_parents: - return {} - # otherwise keep going - else: - continue + wildcard_domain_results[host] = self._wildcard_cache[host_hash] + continue + # determine if this is a wildcard domain wildcard_futures = {} # resolve a bunch of random subdomains of the same parent for rdtype in self.all_rdtypes: - wildcard_futures[rdtype] = [] + # continue if a wildcard was already found for this rdtype + # if rdtype in self._wildcard_cache[host_hash]: + # continue for _ in range(self.wildcard_tests): rand_query = f"{rand_string(digits=False, length=10)}.{host}" - future = self._thread_pool.submit_task( - self._catch_keyboardinterrupt, self.resolve, rand_query, type=rdtype, retries=retries + future = self.submit_task( + self._catch_keyboardinterrupt, + self.resolve, + rand_query, + type=rdtype, + cache_result=False, ) - wildcard_futures[rdtype].append(future) + if future is None: + break + wildcard_futures[future] = rdtype # combine the random results - wildcard_rdtypes = {} - for rdtype, futures in wildcard_futures.items(): - wildcard_results = set() - for future in self.parent_helper.as_completed(futures): - results = future.result() - if results: - wildcard_results.update(results) - if wildcard_results: - wildcard_rdtypes[rdtype] = wildcard_results - - self._wildcard_cache.update({host_hash: wildcard_rdtypes}) - if wildcard_rdtypes: - wildcard_rdtypes_str = ",".join([t.upper() for t in wildcard_rdtypes]) + is_wildcard = False + wildcard_results = dict() + for future in self.parent_helper.as_completed(wildcard_futures): + results = future.result() + rdtype = wildcard_futures[future] + if results: + is_wildcard = True + if results: + if not rdtype in wildcard_results: + wildcard_results[rdtype] = set() + wildcard_results[rdtype].update(results) + + self._wildcard_cache.update({host_hash: wildcard_results}) + wildcard_domain_results.update({host: wildcard_results}) + if is_wildcard: + wildcard_rdtypes_str = ",".join(sorted([t.upper() for t, r in wildcard_results.items() if r])) log.info(f"Encountered domain with wildcard DNS ({wildcard_rdtypes_str}): {host}") - return wildcard_rdtypes - return {} + + return wildcard_domain_results def _catch_keyboardinterrupt(self, callback, *args, **kwargs): try: return callback(*args, **kwargs) except Exception as e: - import traceback - log.error(f"Error in {callback.__qualname__}(): {e}") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) except KeyboardInterrupt: if self.parent_helper.scan: self.parent_helper.scan.stop() diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index c88057036c..949f9dfe76 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -41,8 +41,6 @@ def __init__(self, config, scan=None): self.mkdir(self.temp_dir) self.mkdir(self.tools_dir) self.mkdir(self.lib_dir) - # holds requests CachedSession() objects for duration of scan - self.cache_sessions = dict() self._futures = set() self._future_lock = Lock() @@ -57,9 +55,8 @@ def __init__(self, config, scan=None): def interactsh(self): return Interactsh(self) - def http_compare(self, url, allow_redirects=False): - - return HttpCompare(url, self, allow_redirects=allow_redirects) + def http_compare(self, url, allow_redirects=False, include_cache_buster=True): + return HttpCompare(url, self, allow_redirects=allow_redirects, include_cache_buster=include_cache_buster) def temp_filename(self): """ @@ -81,13 +78,13 @@ def scan(self): @property def in_tests(self): - return os.environ["BBOT_TESTING"] == "True" + return os.environ.get("BBOT_TESTING", "") == "True" @staticmethod def as_completed(*args, **kwargs): return as_completed(*args, **kwargs) - def _make_dummy_module(self, name, _type): + def _make_dummy_module(self, name, _type="scan"): """ Construct a dummy module, for attachment to events """ @@ -119,6 +116,8 @@ def __getattribute__(self, attr): class DummyModule(BaseModule): + _priority = 4 + def __init__(self, *args, **kwargs): self._name = kwargs.pop("name") self._type = kwargs.pop("_type") diff --git a/bbot/core/helpers/interactsh.py b/bbot/core/helpers/interactsh.py index 52b1573898..5d065b118e 100644 --- a/bbot/core/helpers/interactsh.py +++ b/bbot/core/helpers/interactsh.py @@ -29,7 +29,6 @@ def __init__(self, parent_helper): self._thread = None def register(self, callback=None): - rsa = RSA.generate(1024) self.public_key = rsa.publickey().exportKey() @@ -84,7 +83,6 @@ def register(self, callback=None): return self.domain def deregister(self): - if not self.server or not self.correlation_id or not self.secret: raise InteractshError(f"Missing required information to deregister") @@ -99,7 +97,6 @@ def deregister(self): raise InteractshError(f"Failed to de-register with interactsh server {self.server}") def poll(self): - if not self.server or not self.correlation_id or not self.secret: raise InteractshError(f"Missing required information to poll") @@ -116,7 +113,6 @@ def poll(self): aes_key = r.json()["aes_key"] for data in data_list: - decrypted_data = self.decrypt(aes_key, data) yield decrypted_data @@ -133,7 +129,7 @@ def _poll_loop(self, callback): data_list = list(self.poll()) except InteractshError as e: log.warning(e) - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) if not data_list: sleep(10) continue diff --git a/bbot/core/helpers/logger.py b/bbot/core/helpers/logger.py index c22c5d24c2..1c7a126a10 100644 --- a/bbot/core/helpers/logger.py +++ b/bbot/core/helpers/logger.py @@ -2,6 +2,7 @@ loglevel_mapping = { "DEBUG": "DBUG", + "TRACE": "TRCE", "VERBOSE": "VERB", "HUGEVERBOSE": "VERB", "INFO": "INFO", @@ -15,6 +16,7 @@ } color_mapping = { "DEBUG": 242, # grey + "TRACE": 242, # red "VERBOSE": 242, # grey "INFO": 69, # blue "HUGEINFO": 69, # blue diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index e577314e29..6293a47467 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -10,6 +10,7 @@ import signal import string import logging +import platform import ipaddress import wordninja import subprocess as sp @@ -25,7 +26,8 @@ from .url import * # noqa F401 from . import regexes from .. import errors -from .names_generator import random_name # noqa F401 +from .punycode import * # noqa F401 +from .names_generator import random_name, names, adjectives # noqa F401 log = logging.getLogger("bbot.core.helpers.misc") @@ -332,7 +334,9 @@ def kill_children(parent_pid=None, sig=signal.SIGTERM): try: child.send_signal(sig) except psutil.NoSuchProcess: - log.debug(f"No such PID: {parent_pid}") + log.debug(f"No such PID: {child.pid}") + except psutil.AccessDenied: + log.debug(f"Error killing PID: {child.pid} - access denied") def str_or_file(s): @@ -364,8 +368,8 @@ def chain_lists(l, try_files=False, msg=None): f_path = Path(f).resolve() if try_files and f_path.is_file(): if msg is not None: - msg = str(msg).format(filename=f_path) - log.info(msg) + new_msg = str(msg).format(filename=f_path) + log.info(new_msg) for line in str_or_file(f): final_list[line] = None else: @@ -471,32 +475,42 @@ def search_format_dict(d, **kwargs): return d -def filter_dict(d, *key_names, fuzzy=False, invert=False): +def filter_dict(d, *key_names, fuzzy=False, invert=False, exclude_keys=None, prev_key=None): """ Recursively filter a dictionary based on key names - filter_dict({"key1": "test", "key2": "asdf"}, key_name=key2) + filter_dict({"key1": "test", "key2": "asdf"}, "key2") --> {"key2": "asdf"} """ + if exclude_keys is None: + exclude_keys = [] + if isinstance(exclude_keys, str): + exclude_keys = [exclude_keys] ret = {} if isinstance(d, dict): for key in d: if key in key_names or (fuzzy and any(k in key for k in key_names)): - ret[key] = copy.deepcopy(d[key]) + if not prev_key in exclude_keys: + ret[key] = copy.deepcopy(d[key]) elif isinstance(d[key], list) or isinstance(d[key], dict): - child = filter_dict(d[key], *key_names, fuzzy=fuzzy) + child = filter_dict(d[key], *key_names, fuzzy=fuzzy, prev_key=key, exclude_keys=exclude_keys) if child: ret[key] = child return ret -def clean_dict(d, *key_names, fuzzy=False): +def clean_dict(d, *key_names, fuzzy=False, exclude_keys=None, prev_key=None): + if exclude_keys is None: + exclude_keys = [] + if isinstance(exclude_keys, str): + exclude_keys = [exclude_keys] d = copy.deepcopy(d) if isinstance(d, dict): for key, val in list(d.items()): if key in key_names or (fuzzy and any(k in key for k in key_names)): - d.pop(key) + if prev_key not in exclude_keys: + d.pop(key) else: - d[key] = clean_dict(val, *key_names, fuzzy=fuzzy) + d[key] = clean_dict(val, *key_names, fuzzy=fuzzy, prev_key=key, exclude_keys=exclude_keys) return d @@ -755,3 +769,46 @@ def human_timedelta(d): if seconds: result.append(f"{seconds:,} second" + ("s" if seconds > 1 else "")) return ", ".join(result) + + +def cpu_architecture(): + """ + Returns the CPU architecture, e.g. "amd64, "armv7", "arm64", etc. + """ + uname = platform.uname() + arch = uname.machine.lower() + if arch.startswith("aarch"): + return "arm64" + elif arch == "x86_64": + return "amd64" + return arch + + +def os_platform(): + """ + Returns the OS platform, e.g. "linux", "darwin", "windows", etc. + """ + return platform.system().lower() + + +def os_platform_friendly(): + """ + Returns the OS platform in a more human-friendly format, because apple is indecisive + """ + p = os_platform() + if p == "darwin": + return "macOS" + return p + + +tag_filter_regex = re.compile(r"[^a-z0-9]+") + + +def tagify(s): + """ + Sanitize a string into a tag-friendly format + + tagify("HTTP Web Title") --> "http-web-title" + """ + ret = str(s).lower() + return tag_filter_regex.sub("-", ret).strip("-") diff --git a/bbot/core/helpers/modules.py b/bbot/core/helpers/modules.py index 1df2b5c99a..ea4d3dd349 100644 --- a/bbot/core/helpers/modules.py +++ b/bbot/core/helpers/modules.py @@ -5,7 +5,7 @@ from omegaconf import OmegaConf from contextlib import suppress -from .misc import list_files, sha1, search_dict_by_key, search_format_dict, make_table +from .misc import list_files, sha1, search_dict_by_key, search_format_dict, make_table, os_platform class ModuleLoader: @@ -137,6 +137,12 @@ def preload_module(self, module_file): # ansible playbook elif any([target.id == "deps_ansible" for target in class_attr.targets]): ansible_tasks = ast.literal_eval(class_attr.value) + for task in ansible_tasks: + if not "become" in task: + task["become"] = False + # don't sudo brew + elif os_platform() == "darwin" and ("package" in task and task.get("become", False) == True): + task["become"] = False preloaded_data = { "watched_events": watched_events, "produced_events": produced_events, @@ -217,6 +223,8 @@ def recommend_dependencies(self, modules): missing_deps = {e: not self.check_dependency(e, modname, produced) for e in watched_events} if all(missing_deps.values()): for event_type in watched_events: + if event_type == "SCAN": + continue choices = produced_all.get(event_type, []) choices = set(choices) with suppress(KeyError): diff --git a/bbot/core/helpers/names_generator.py b/bbot/core/helpers/names_generator.py index c8aee35497..a87d26ecd2 100644 --- a/bbot/core/helpers/names_generator.py +++ b/bbot/core/helpers/names_generator.py @@ -3,7 +3,6 @@ adjectives = [ "abnormal", "acrophobic", - "adhesive", "adorable", "adversarial", "affectionate", @@ -21,6 +20,7 @@ "blazed", "bloodshot", "brown", + "carbonated", "cheeky", "childish", "chiseled", @@ -40,11 +40,13 @@ "cute", "dark", "dastardly", + "decrypted", "deep", "delicious", "demonic", - "depressed", "depraved", + "depressed", + "deranged", "derogatory", "despicable", "devilish", @@ -54,12 +56,15 @@ "difficult", "dilapidated", "dismal", + "distilled", "disturbed", "dramatic", "drunk", "effeminate", + "elden", "eldritch", "embarrassed", + "encrypted", "enigmatic", "enlightened", "esoteric", @@ -98,6 +103,8 @@ "hellish", "hideous", "hysterical", + "imaginary", + "immense", "immoral", "incomprehensible", "inebriated", @@ -117,9 +124,11 @@ "inventive", "irritable", "large", + "liquid", "loveable", "lovely", "malevolent", + "malfunctioning", "malicious", "manic", "masochistic", @@ -127,6 +136,7 @@ "mediocre", "melodramatic", "moist", + "molten", "monstrous", "muscular", "mushy", @@ -135,8 +145,9 @@ "nefarious", "negligent", "neurotic", - "normal", "nihilistic", + "normal", + "overattached", "overcompensating", "overmedicated", "overwhelming", @@ -159,16 +170,16 @@ "premature", "profound", "promiscuous", - "psychic", "psychedelic", + "psychic", "puffy", "pure", "queer", "questionable", "rabid", "raging", - "raving", "rambunctious", + "raving", "reckless", "ripped", "sadistic", @@ -196,17 +207,17 @@ "strained", "strenuous", "stricken", + "stubborn", "stuffed", "stumped", "subtle", - "suggestive", - "suicidal", "sudden", + "suggestive", "sunburned", "surreal", "suspicious", - "sycophantic", "sweet", + "sycophantic", "tense", "terrible", "terrific", @@ -226,6 +237,7 @@ "unmedicated", "unmelted", "unmitigated", + "unrelenting", "unrestrained", "unworthy", "utmost", @@ -279,7 +291,9 @@ "audrey", "austin", "baggins", + "bailey", "barbara", + "bart", "bellatrix", "benjamin", "betty", @@ -289,6 +303,7 @@ "bobby", "bombadil", "bonnie", + "bonson", "boromir", "bradley", "brandon", @@ -328,6 +343,7 @@ "daniel", "danielle", "danny", + "data", "david", "dawn", "deborah", @@ -388,6 +404,7 @@ "galadriel", "gandalf", "gary", + "geordi", "george", "gerald", "gimli", @@ -408,6 +425,7 @@ "helen", "henry", "hermione", + "homer", "howard", "irene", "isaac", @@ -424,6 +442,7 @@ "jasmine", "jason", "jean", + "jean-luc", "jeffrey", "jennifer", "jeremy", @@ -459,6 +478,7 @@ "kelly", "kenneth", "kenobi", + "kerry", "kevin", "kimberly", "kyle", @@ -479,10 +499,12 @@ "lori", "louis", "louise", + "lucius", "luis", "luke", "lupin", "madison", + "magnus", "margaret", "maria", "marie", @@ -500,14 +522,18 @@ "melvin", "merry", "michael", + "micheal", "michelle", "mildred", + "milhouse", "monica", "nancy", "natalie", "nathan", "nathaniel", "nazgul", + "ned", + "nelson", "nicholas", "nicole", "noah", @@ -542,6 +568,7 @@ "ron", "ronald", "rose", + "ross", "roy", "ruby", "russell", @@ -603,7 +630,9 @@ "wendy", "william", "willie", + "worf", "wormtongue", + "xavier", "yoda", "zachary", ] diff --git a/bbot/core/helpers/punycode.py b/bbot/core/helpers/punycode.py new file mode 100644 index 0000000000..bbebbafb87 --- /dev/null +++ b/bbot/core/helpers/punycode.py @@ -0,0 +1,27 @@ +import idna +from contextlib import suppress + + +def smart_decode_punycode(data): + """ + xn--eckwd4c7c.xn--zckzah --> ドメイン.テスト + """ + if not isinstance(data, str): + raise ValueError(f"data must be a string, not {type(data)}") + if "xn--" in data: + with suppress(UnicodeError): + parts = data.split("@") + return "@".join(idna.decode(p) for p in parts) + return data + + +def smart_encode_punycode(data): + """ + ドメイン.テスト --> xn--eckwd4c7c.xn--zckzah + """ + if not isinstance(data, str): + raise ValueError(f"data must be a string, not {type(data)}") + with suppress(UnicodeError): + parts = data.split("@") + return "@".join(idna.encode(p).decode(errors="ignore") for p in parts) + return data diff --git a/bbot/core/helpers/queueing.py b/bbot/core/helpers/queueing.py new file mode 100644 index 0000000000..3b7674544c --- /dev/null +++ b/bbot/core/helpers/queueing.py @@ -0,0 +1,102 @@ +import random +from contextlib import suppress +from queue import PriorityQueue, Empty + + +class QueuedEvent(tuple): + """ + Allows sorting of tuples in outgoing PriorityQueue + """ + + def __init__(self, item): + self.item = item + + def __gt__(self, other): + return self.event > other.event + + def __lt__(self, other): + return self.event < other.event + + @property + def event(self): + return self._get_event(self.item) + + @staticmethod + def _get_event(e): + try: + return e[0] + except Exception: + return e + + +class EventQueue(PriorityQueue): + """ + A "meta-queue" class that includes five queues, one for each priority + + Events are taken from the queues in a weighted random fashion based + on the priority of their parent module. + + This prevents complete exclusion of lower-priority events + + This queue also tracks events by module and event type for stat purposes + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.event_types = dict() + self.modules = dict() + self._queues = dict() + self._priorities = (1, 2, 3, 4, 5) + self._weights = (10, 7, 5, 3, 1) + for priority in self._priorities: + q = PriorityQueue(*args, **kwargs) + self._queues[priority] = q + + @property + def events(self): + for q in self._queues: + for e in q.queue: + yield e.event + + def _qsize(self): + return sum(q._qsize() for q in self._queues.values()) + + def empty(self): + return all(q.empty() for q in self._queues.values()) + + def _put(self, item): + queued_event = QueuedEvent(item) + q = self._queues[queued_event.event.module_priority] + self._increment(self.event_types, queued_event.event.type) + self._increment(self.modules, str(queued_event.event.module)) + q._put(queued_event) + + def _get(self): + # first pick a (weighted) random queue + priority = self._random_priority() + try: + # and get an event from it + queued_event = self._queues[priority]._get() + # if that fails + except IndexError: + # try every queue + queues = [_ for _ in self._queues.values() if not _.empty()] + if not queues: + raise Empty + queued_event = queues[0]._get() + self._decrement(self.event_types, queued_event.event.type) + self._decrement(self.modules, str(queued_event.event.module)) + return queued_event.item + + def _random_priority(self): + return random.choices(self._priorities, weights=self._weights, k=1)[0] + + def _increment(self, d, v): + try: + d[v] += 1 + except KeyError: + d[v] = 1 + + def _decrement(self, d, v): + with suppress(KeyError): + d[v] = max(0, d[v] - 1) diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py index 1f1d277b24..716374f8cb 100644 --- a/bbot/core/helpers/regexes.py +++ b/bbot/core/helpers/regexes.py @@ -16,15 +16,21 @@ ] ] +# Designed to remove junk such as these from the beginning of a search string: +# \nhttps://www.google.com +# \x3dhttps://www.google.com +# %a2https://www.google.com +# \uac20https://www.google.com +junk_remover = r"(?:\\x[a-fA-F0-9]{2}|%[a-fA-F0-9]{2}|\\u[a-fA-F0-9]{4}|\\[a-zA-Z])?" word_regex = re.compile(r"[^\d\W_]+") word_num_regex = re.compile(r"[^\W_]+") num_regex = re.compile(r"\d+") _ipv6_regex = r"[A-F0-9:]*:[A-F0-9:]*:[A-F0-9:]*" ipv6_regex = re.compile(_ipv6_regex, re.I) -_dns_name_regex = r"(?:(?:[\w-]+)\.)+(?:[a-z0-9]{2,20})" +_dns_name_regex = r"(?:(?:[\w-]+)\.)+(?:[^\W_0-9]{2,20})" _hostname_regex = re.compile(r"^[\w-]+$") -_email_regex = r"(?:[a-zA-Z0-9][\w\-\.\+]{,100})@(?:[a-zA-Z0-9_][\w\-\._]{,100})\.(?:[a-zA-Z]{2,8})" +_email_regex = r"(?:[^\W_][\w\-\.\+]{,100})@(?:\w[\w\-\._]{,100})\.(?:[^\W_0-9]{2,8})" email_regex = re.compile(_email_regex, re.I) event_type_regexes = OrderedDict( @@ -42,14 +48,14 @@ ( "OPEN_TCP_PORT", ( - r"^((?:[A-Z0-9_]|[A-Z0-9_][A-Z0-9\-_]*[A-Z0-9_])[\.]?)+(?:[A-Z0-9_][A-Z0-9\-_]*[A-Z0-9_]|[A-Z0-9_]):[0-9]{1,5}$", + r"^((?:\w|\w[\w\-]*\w)[\.]?)+(?:\w[\w\-]*\w|\w):[0-9]{1,5}$", r"^\[" + _ipv6_regex + r"\]:[0-9]{1,5}$", ), ), ( "URL", ( - r"https?://((?:[A-Z0-9_]|[A-Z0-9_][A-Z0-9\-_]*[A-Z0-9_])[\.]?)+(?:[A-Z0-9_][A-Z0-9\-_]*[A-Z0-9_]|[A-Z0-9_])(?::[0-9]{1,5})?.*$", + r"https?://((?:\w|\w[\w\-]*\w)[\.]?)+(?:\w[\w\-]*\w|\w)(?::[0-9]{1,5})?.*$", r"https?://\[" + _ipv6_regex + r"\](?::[0-9]{1,5})?.*$", ), ), diff --git a/bbot/core/helpers/threadpool.py b/bbot/core/helpers/threadpool.py index dba634c90a..73a62a2363 100644 --- a/bbot/core/helpers/threadpool.py +++ b/bbot/core/helpers/threadpool.py @@ -1,6 +1,9 @@ import logging import threading -from time import sleep +import traceback +from datetime import datetime +from queue import SimpleQueue, Full +from concurrent.futures import ThreadPoolExecutor log = logging.getLogger("bbot.core.helpers.threadpool") @@ -8,44 +11,153 @@ from ...core.errors import ScanCancelledError +def pretty_fn(a): + if callable(a): + return a.__qualname__ + return a + + +class ThreadPoolSimpleQueue(SimpleQueue): + def __init__(self, *args, **kwargs): + self._executor = kwargs.pop("_executor", None) + + def get(self, *args, **kwargs): + work_item = super().get(*args, **kwargs) + thread_id = threading.get_ident() + self._executor._current_work_items[thread_id] = (work_item, datetime.now()) + return work_item + + +class BBOTThreadPoolExecutor(ThreadPoolExecutor): + """ + Allows inspection of thread pool to determine which functions are currently executing + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._current_work_items = {} + self._work_queue = ThreadPoolSimpleQueue(_executor=self) + + @property + def threads_status(self): + work_items = [] + for thread_id, (work_item, start_time) in sorted(self._current_work_items.items()): + func = work_item.fn.__qualname__ + func_index = 0 + if work_item and not work_item.future.done(): + for i, f in enumerate(list(work_item.args)): + if callable(f): + func = f.__qualname__ + func_index = i + 1 + else: + break + running_for = datetime.now() - start_time + wi_args = list(work_item.args)[func_index:] + wi_args = [pretty_fn(a) for a in wi_args] + wi_args = str(wi_args).strip("[]") + wi_kwargs = ", ".join(["{0}={1}".format(k, pretty_fn(v)) for k, v in work_item.kwargs.items()]) + func_with_args = f"{func}({wi_args}" + (f", {wi_kwargs}" if wi_kwargs else "") + ")" + work_items.append( + (running_for, f"running for {int(running_for.total_seconds()):>3} seconds: {func_with_args}") + ) + work_items.sort(key=lambda x: x[0]) + return [x[-1] for x in work_items] + + class ThreadPoolWrapper: """ Layers more granular control overtop of a shared thread pool Allows setting lower thread limits for modules, etc. """ - def __init__(self, executor, max_workers=None): + def __init__(self, executor, max_workers=None, qsize=None): self.executor = executor self.max_workers = max_workers + self.max_qsize = qsize self.futures = set() - self._future_lock = threading.Lock() - self._submit_task_lock = threading.Lock() + try: + self.executor._thread_pool_wrappers.append(self) + except AttributeError: + self.executor._thread_pool_wrappers = [self] + + self._num_tasks = 0 + self._task_count_lock = threading.Lock() + + self._lock = threading.RLock() + self.not_full = threading.Condition(self._lock) def submit_task(self, callback, *args, **kwargs): """ A wrapper around threadpool.submit() - - This blocks, which isn't ideal, but it ensures that modules don't hog the shared thread pool """ - with self._submit_task_lock: - if self.max_workers is not None: - while self.num_tasks > self.max_workers: - sleep(0.1) + block = kwargs.get("_block", True) + force = kwargs.get("_force_submit", False) + success = False + with self.not_full: + self.num_tasks_increment() try: - future = self.executor.submit(callback, *args, **kwargs) - except RuntimeError as e: - raise ScanCancelledError(e) - with self._future_lock: - self.futures.add(future) - return future + if not force: + if not block: + if self.is_full or self.underlying_executor_is_full: + raise Full + else: + # wait until there's room + while self.is_full or self.underlying_executor_is_full: + self.not_full.wait() + + try: + # submit the job + future = self.executor.submit(self._execute_callback, callback, *args, **kwargs) + future.add_done_callback(self._on_future_done) + success = True + return future + except RuntimeError as e: + raise ScanCancelledError(e) + finally: + if not success: + self.num_tasks_decrement() + + def _execute_callback(self, callback, *args, **kwargs): + try: + return callback(*args, **kwargs) + finally: + self.num_tasks_decrement() + + def _on_future_done(self, future): + if future.cancelled(): + self.num_tasks_decrement() @property def num_tasks(self): - with self._future_lock: - for f in list(self.futures): - if f.done(): - self.futures.remove(f) - return len(self.futures) + (1 if self._submit_task_lock.locked() else 0) + with self._task_count_lock: + return self._num_tasks + + def num_tasks_increment(self): + with self._task_count_lock: + self._num_tasks += 1 + + def num_tasks_decrement(self): + with self._task_count_lock: + self._num_tasks = max(0, self._num_tasks - 1) + for wrapper in self.executor._thread_pool_wrappers: + try: + with wrapper.not_full: + wrapper.not_full.notify() + except RuntimeError: + continue + except Exception as e: + log.warning(f"Unknown error in num_tasks_decrement(): {e}") + log.trace(traceback.format_exc()) + + @property + def is_full(self): + if self.max_workers is None: + return False + return self.num_tasks > self.max_workers + + @property + def underlying_executor_is_full(self): + return self.max_qsize is not None and self.qsize >= self.max_qsize @property def qsize(self): @@ -54,21 +166,63 @@ def qsize(self): def shutdown(self, *args, **kwargs): self.executor.shutdown(*args, **kwargs) + @property + def threads_status(self): + return self.executor.threads_status + + +import time +from concurrent.futures._base import ( + FINISHED, + _AS_COMPLETED, + _AcquireFutures, + _create_and_install_waiters, + _yield_finished_futures, +) + + +def as_completed(fs, timeout=None): + """ + Copied from https://github.com/python/cpython/blob/main/Lib/concurrent/futures/_base.py + Modified to only yield FINISHED futures (not CANCELLED_AND_NOTIFIED) + """ + if timeout is not None: + end_time = timeout + time.monotonic() + + fs = set(fs) + total_futures = len(fs) + with _AcquireFutures(fs): + finished = set(f for f in fs if f._state == FINISHED) + pending = fs - finished + waiter = _create_and_install_waiters(fs, _AS_COMPLETED) + finished = list(finished) + try: + yield from _yield_finished_futures(finished, waiter, ref_collect=(fs,)) + + while pending: + if timeout is None: + wait_timeout = None + else: + wait_timeout = end_time - time.monotonic() + if wait_timeout < 0: + raise TimeoutError("%d (of %d) futures unfinished" % (len(pending), total_futures)) + + waiter.event.wait(wait_timeout) + + with waiter.lock: + finished = waiter.finished_futures + waiter.finished_futures = [] + waiter.event.clear() + + # reverse to keep finishing order + finished.reverse() + yield from _yield_finished_futures(finished, waiter, ref_collect=(fs, pending)) -def as_completed(fs): - fs = list(fs) - while fs: - result = False - for i, f in enumerate(fs): - if f.done(): - result = True - future = fs.pop(i) - if future._state in ("CANCELLED", "CANCELLED_AND_NOTIFIED"): - continue - yield future - break - if not result: - sleep(0.05) + finally: + # Remove waiter from unfinished futures + for f in fs: + with f._condition: + f._waiters.remove(waiter) class _Lock: diff --git a/bbot/core/helpers/url.py b/bbot/core/helpers/url.py index 8fc0b3c40f..a6c5a7aa7e 100644 --- a/bbot/core/helpers/url.py +++ b/bbot/core/helpers/url.py @@ -4,6 +4,8 @@ from contextlib import suppress from urllib.parse import urlparse, parse_qs, urlencode, ParseResult +from .punycode import smart_decode_punycode + log = logging.getLogger("bbot.core.helpers.url") @@ -76,6 +78,8 @@ def clean_url(url): # special case for IPv6 URLs if parsed.netloc.startswith("["): hostname = f"[{hostname}]" + # punycode + hostname = smart_decode_punycode(hostname) parsed = parsed._replace(netloc=hostname) # normalize double slashes parsed = parsed._replace(path=double_slash_regex.sub("/", parsed.path)) diff --git a/bbot/core/helpers/validators.py b/bbot/core/helpers/validators.py index 04eaa653c8..587178cd66 100644 --- a/bbot/core/helpers/validators.py +++ b/bbot/core/helpers/validators.py @@ -3,6 +3,7 @@ from bbot.core.helpers import regexes from bbot.core.helpers.url import clean_url +from bbot.core.helpers.punycode import smart_decode_punycode from bbot.core.helpers.misc import split_host_port, make_netloc log = logging.getLogger("bbot.core.helpers.") @@ -55,7 +56,7 @@ def validate_host(host): return str(ip) except Exception: # finally, try DNS_NAME - host = host.lstrip("*.") + host = smart_decode_punycode(host.lstrip("*.")) for r in regexes.event_type_regexes["DNS_NAME"]: if r.match(host): return host @@ -87,7 +88,7 @@ def validate_severity(severity): @validator def validate_email(email): - email = str(email).strip().lower() + email = smart_decode_punycode(str(email).strip().lower()) if any(r.match(email) for r in regexes.event_type_regexes["EMAIL_ADDRESS"]): return email assert False, f'Invalid email: "{email}"' diff --git a/bbot/core/helpers/web.py b/bbot/core/helpers/web.py index a4a4400c6c..32e8565535 100644 --- a/bbot/core/helpers/web.py +++ b/bbot/core/helpers/web.py @@ -3,10 +3,11 @@ from time import sleep from pathlib import Path from requests_cache import CachedSession +from requests.adapters import HTTPAdapter from requests_cache.backends import SQLiteCache from requests.exceptions import RequestException -from bbot.core.errors import WordlistError +from bbot.core.errors import WordlistError, CurlError log = logging.getLogger("bbot.core.helpers.web") @@ -87,21 +88,29 @@ def request(self, *args, **kwargs): raise_error (bool): Whether to raise exceptions (default: False) """ + # we handle our own retries + retries = kwargs.pop("retries", self.config.get("http_retries", 1)) + if getattr(self, "retry_adapter", None) is None: + self.retry_adapter = HTTPAdapter(max_retries=0) + raise_error = kwargs.pop("raise_error", False) cache_for = kwargs.pop("cache_for", None) if cache_for is not None: log.debug(f"Caching HTTP session with expire_after={cache_for}") - try: - session = self.cache_sessions[cache_for] - except KeyError: - db_path = str(self.cache_dir / "requests-cache.sqlite") - backend = SQLiteCache(db_path=db_path) - session = CachedSession(expire_after=cache_for, backend=backend) - self.cache_sessions[cache_for] = session - - if kwargs.pop("session", None) or not cache_for: + db_path = str(self.cache_dir / "requests-cache.sqlite") + backend = SQLiteCache(db_path=db_path) + session = CachedSession(expire_after=cache_for, backend=backend) + session.mount("https://", self.retry_adapter) + session.mount("https://", self.retry_adapter) + elif kwargs.get("session", None) is not None: session = kwargs.pop("session", None) + session.mount("https://", self.retry_adapter) + session.mount("https://", self.retry_adapter) + else: + session = requests.Session() + session.mount("https://", self.retry_adapter) + session.mount("https://", self.retry_adapter) http_timeout = self.config.get("http_timeout", 20) user_agent = self.config.get("user_agent", "BBOT") @@ -112,7 +121,6 @@ def request(self, *args, **kwargs): args = [] url = kwargs.get("url", "") - retries = kwargs.pop("retries", 0) if not args and "method" not in kwargs: kwargs["method"] = "GET" @@ -126,6 +134,12 @@ def request(self, *args, **kwargs): headers = {} if "User-Agent" not in headers: headers.update({"User-Agent": user_agent}) + # only add custom headers if the URL is in-scope + if self.scan.in_scope(url): + for hk, hv in self.scan.config.get("http_headers", {}).items(): + # don't clobber headers + if hk not in headers: + headers[hk] = hv kwargs["headers"] = headers http_debug = self.config.get("http_debug", False) @@ -146,7 +160,7 @@ def request(self, *args, **kwargs): if retries != "infinite": retries -= 1 if retries == "infinite" or retries >= 0: - log.warning(f'Error requesting "{url}" ({e}), retrying...') + log.verbose(f'Error requesting "{url}" ({e}), retrying...') sleep(1) else: if raise_error: @@ -167,7 +181,7 @@ def api_page_iter(self, url, page_size=100, json=True, **requests_kwargs): import traceback log.warning(f'Error in api_page_iter() for url: "{new_url}"') - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) break finally: offset += page_size @@ -175,12 +189,10 @@ def api_page_iter(self, url, page_size=100, json=True, **requests_kwargs): def curl(self, *args, **kwargs): - url = kwargs.get("url", "") if not url: - log.debug("No URL supplied to CURL helper") - return + raise CurlError("No URL supplied to CURL helper") curl_command = ["curl", url, "-s"] @@ -206,6 +218,11 @@ def curl(self, *args, **kwargs): if "User-Agent" not in headers: headers["User-Agent"] = user_agent + # only add custom headers if the URL is in-scope + if self.scan.in_scope(url): + for hk, hv in self.scan.config.get("http_headers", {}).items(): + headers[hk] = hv + # add the timeout if not "timeout" in kwargs: timeout = http_timeout @@ -214,7 +231,7 @@ def curl(self, *args, **kwargs): curl_command.append(str(timeout)) for k, v in headers.items(): - if type(v) == list: + if isinstance(v, list): for x in v: curl_command.append("-H") curl_command.append(f"{k}: {x}") @@ -238,7 +255,6 @@ def curl(self, *args, **kwargs): cookies = kwargs.get("cookies", "") if cookies: - curl_command.append("-b") cookies_str = "" for k, v in cookies.items(): diff --git a/bbot/core/helpers/wordcloud.py b/bbot/core/helpers/wordcloud.py index c79f91cb61..881ba872ad 100644 --- a/bbot/core/helpers/wordcloud.py +++ b/bbot/core/helpers/wordcloud.py @@ -162,7 +162,7 @@ def save(self, filename=None, limit=None): import traceback log.warning(f"Failed to save word cloud to {filename}: {e}") - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) return False, filename def load(self, filename=None): @@ -192,4 +192,4 @@ def load(self, filename=None): log_fn = log.warning log_fn(f"Failed to load word cloud from {wordcloud_path}: {e}") if filename is not None: - log.debug(traceback.format_exc()) + log.trace(traceback.format_exc()) diff --git a/bbot/core/logger/__init__.py b/bbot/core/logger/__init__.py index 9d82a31331..d55c5a2b0a 100644 --- a/bbot/core/logger/__init__.py +++ b/bbot/core/logger/__init__.py @@ -1 +1 @@ -from .logger import init_logging, get_log_level, ColoredFormatter +from .logger import init_logging, get_log_level, ColoredFormatter, toggle_log_level diff --git a/bbot/core/logger/logger.py b/bbot/core/logger/logger.py index b1e86fea0e..c5668b1d89 100644 --- a/bbot/core/logger/logger.py +++ b/bbot/core/logger/logger.py @@ -13,6 +13,9 @@ from ..helpers.logger import colorize, loglevel_mapping +_log_level_override = None + + class ColoredFormatter(logging.Formatter): """ Pretty colors for terminal @@ -81,6 +84,7 @@ def logToRoot(message, *args, **kwargs): # custom logging levels addLoggingLevel("STDOUT", 100) +addLoggingLevel("TRACE", 49) addLoggingLevel("HUGEWARNING", 31) addLoggingLevel("HUGESUCCESS", 26) addLoggingLevel("SUCCESS", 25) @@ -89,6 +93,9 @@ def logToRoot(message, *args, **kwargs): addLoggingLevel("VERBOSE", 15) +verbosity_levels_toggle = [logging.INFO, logging.VERBOSE, logging.DEBUG] + + def stop_listener(listener): with suppress(Exception): listener.stop() @@ -109,7 +116,6 @@ def log_worker_setup(logging_queue): def log_listener_setup(logging_queue): - log_dir = Path(config["home"]) / "logs" if not mkdir(log_dir, raise_error=False): error_and_exit(f"Failure creating or error writing to BBOT logs directory ({log_dir})") @@ -130,13 +136,13 @@ def log_listener_setup(logging_queue): f"{log_dir}/bbot.debug.log", when="d", interval=1, backupCount=14 ) - log_level = get_log_level() - - config_debug = config.get("debug", False) - config_silent = config.get("silent", False) - def stderr_filter(record): - if record.levelno == logging.STDOUT: + config_silent = config.get("silent", False) + log_level = get_log_level() + excluded_levels = [logging.STDOUT] + if log_level > logging.DEBUG: + excluded_levels.append(logging.TRACE) + if record.levelno in excluded_levels: return False if record.levelno >= logging.ERROR: return True @@ -149,7 +155,7 @@ def stderr_filter(record): stderr_handler.addFilter(stderr_filter) stdout_handler.addFilter(lambda x: x.levelno == logging.STDOUT) debug_handler.addFilter(lambda x: x.levelno != logging.STDOUT and x.levelno >= logging.DEBUG) - main_handler.addFilter(lambda x: x.levelno != logging.STDOUT and x.levelno >= logging.VERBOSE) + main_handler.addFilter(lambda x: x.levelno not in (logging.STDOUT, logging.TRACE) and x.levelno >= logging.VERBOSE) # Set log format debug_format = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s %(filename)s:%(lineno)s %(message)s") @@ -158,9 +164,7 @@ def stderr_filter(record): stderr_handler.setFormatter(ColoredFormatter("%(levelname)s %(name)s: %(message)s")) stdout_handler.setFormatter(logging.Formatter("%(message)s")) - handlers = [stdout_handler, stderr_handler, main_handler] - if config_debug: - handlers.append(debug_handler) + handlers = [stdout_handler, stderr_handler, main_handler, debug_handler] log_listener = QueueListener(logging_queue, *handlers) log_listener.start() @@ -192,6 +196,9 @@ def init_logging(): def get_log_level(): + if _log_level_override is not None: + return _log_level_override + from bbot.core.configurator.args import cli_options if config.get("debug", False) or os.environ.get("BBOT_DEBUG", "").lower() in ("true", "yes"): @@ -204,3 +211,23 @@ def get_log_level(): if cli_options.debug: loglevel = logging.DEBUG return loglevel + + +def set_log_level(level, logger=None): + global _log_level_override + if logger is not None: + logger.hugeinfo(f"Setting log level to {logging.getLevelName(level)}") + config["silent"] = False + _log_level_override = level + log = logging.getLogger("bbot") + log.setLevel(level) + + +def toggle_log_level(logger=None): + log_level = get_log_level() + if log_level in verbosity_levels_toggle: + for i, level in enumerate(verbosity_levels_toggle): + if log_level == level: + set_log_level(verbosity_levels_toggle[(i + 1) % len(verbosity_levels_toggle)], logger=logger) + else: + set_log_level(verbosity_levels_toggle[0], logger=logger) diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 350e2667b2..82f9c61b83 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -3,13 +3,19 @@ # BBOT working directory home: ~/.bbot # Don't output events that are further than this from the main scope -scope_report_distance: 1 +# 1 == 1 hope away from main scope +# 0 == in scope only +scope_report_distance: 0 # Generate new DNS_NAME and IP_ADDRESS events through DNS resolution dns_resolution: true # Limit the number of BBOT threads max_threads: 25 -# Limit the number of DNS threads +# Limit the number of DNS threads (this should be approximately 4x max_threads) max_dns_threads: 100 +# HTTP proxy +http_proxy: +# Web user-agent +user_agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36 ### ADVANCED OPTIONS ### @@ -28,12 +34,17 @@ excavate: True # Summarize activity at the end of a scan aggregate: True -# HTTP proxy -http_proxy: # HTTP timeout (for Python requests; API calls, etc.) -http_timeout: 30 +http_timeout: 10 # HTTP timeout (for httpx) httpx_timeout: 5 +# Custom HTTP headers (e.g. cookies, etc.) +# in the format { "Header-Key": "header_value" } +# These are attached to all in-scope HTTP requests +# Note that some modules (e.g. github) may end up sending these to out-of-scope resources +http_headers: {} +# HTTP retries (for Python requests; API calls, etc.) +http_retries: 1 # HTTP retries (for httpx) httpx_retries: 1 # Enable/disable debug messages for web requests/responses @@ -64,8 +75,6 @@ dns_debug: false ssl_verify: false # How many scan results to keep before cleaning up the older ones keep_scans: 20 -# Web user-agent -user_agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36 # Completely ignore URLs with these extensions url_extension_blacklist: # images diff --git a/bbot/modules/anubisdb.py b/bbot/modules/anubisdb.py index 43819eaf35..55bf966941 100644 --- a/bbot/modules/anubisdb.py +++ b/bbot/modules/anubisdb.py @@ -2,21 +2,31 @@ class anubisdb(crobat): - flags = ["subdomain-enum", "passive", "safe"] watched_events = ["DNS_NAME"] produced_events = ["DNS_NAME"] meta = {"description": "Query jldc.me's database for subdomains"} base_url = "https://jldc.me/anubis/subdomains" + dns_abort_depth = 5 def request_url(self, query): url = f"{self.base_url}/{self.helpers.quote(query)}" - return self.helpers.request(url) + return self.request_with_fail_count(url) + + def abort_if_pre(self, hostname): + """ + Discards results that are longer than 5 segments, e.g. a.b.c.d.evilcorp.com + This exists because of the _disgusting_ amount of garbage data in this API + """ + dns_depth = hostname.count(".") + 1 + if dns_depth > self.dns_abort_depth: + return True + return False def abort_if(self, event): # abort if dns name is unresolved - return not "resolved" in event.tags or super().abort_if(event) + return (not "resolved" in event.tags) or super().abort_if(event) def parse_results(self, r, query): results = set() @@ -24,6 +34,6 @@ def parse_results(self, r, query): if json: for hostname in json: hostname = str(hostname).lower() - if hostname.endswith(f".{query}"): + if hostname.endswith(f".{query}") and not self.abort_if_pre(hostname): results.add(hostname) return results diff --git a/bbot/modules/azure_tenant.py b/bbot/modules/azure_tenant.py index a075679f65..fafa391a76 100644 --- a/bbot/modules/azure_tenant.py +++ b/bbot/modules/azure_tenant.py @@ -23,7 +23,8 @@ def handle_event(self, event): if domains: self.success(f'Found {len(domains):,} domains under tenant for "{query}"') for domain in domains: - self.emit_event(domain, "DNS_NAME", source=event, tags=["affiliate"]) + if domain != query: + self.emit_event(domain, "DNS_NAME", source=event, tags=["affiliate"]) # todo: tenants? def query(self, domain): @@ -55,7 +56,7 @@ def query(self, domain): self.debug(f"Retrieving tenant domains at {url}") - r = self.helpers.request(url, method="POST", headers=headers, data=data) + r = self.request_with_fail_count(url, method="POST", headers=headers, data=data) status_code = getattr(r, "status_code", 0) if status_code not in (200, 421): self.warning(f'Error retrieving azure_tenant domains for "{domain}" (status code: {status_code})') diff --git a/bbot/modules/badsecrets.py b/bbot/modules/badsecrets.py index 3393e4c0db..eea9ffaee9 100644 --- a/bbot/modules/badsecrets.py +++ b/bbot/modules/badsecrets.py @@ -4,27 +4,28 @@ class badsecrets(BaseModule): - watched_events = ["HTTP_RESPONSE"] produced_events = ["FINDING", "VULNERABILITY"] - flags = ["active", "safe", "web-basic"] - meta = {"description": "Library for detecting known or weak secrets on across many platforms"} - - deps_pip = ["badsecrets"] + flags = ["active", "safe", "web-basic", "web-thorough"] + meta = {"description": "Library for detecting known or weak secrets across many web frameworks"} + max_event_handlers = 2 + deps_pip = ["badsecrets>=0.1.220"] def handle_event(self, event): resp_body = event.data.get("body", None) resp_headers = event.data.get("header", None) resp_cookies = {} - resp_cookies_raw = resp_headers.get("set_cookie", None) - if resp_cookies_raw: - if "," in resp_cookies_raw: - resp_cookies_list = resp_cookies_raw.split(",") - else: - resp_cookies_list = [resp_cookies_raw] - for c in resp_cookies_list: - c2 = c.strip().split(";")[0].split("=") - resp_cookies[c2[0]] = c2[1] + if resp_headers: + resp_cookies_raw = resp_headers.get("set_cookie", None) + if resp_cookies_raw: + if "," in resp_cookies_raw: + resp_cookies_list = resp_cookies_raw.split(",") + else: + resp_cookies_list = [resp_cookies_raw] + for c in resp_cookies_list: + c2 = c.lstrip(";").strip().split(";")[0].split("=") + if len(c2) == 2: + resp_cookies[c2[0]] = c2[1] if resp_body or resp_cookies: r_list = carve_all_modules(body=resp_body, cookies=resp_cookies) if r_list: diff --git a/bbot/modules/base.py b/bbot/modules/base.py index a2627cdf43..2ca858a4ae 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -2,18 +2,14 @@ import logging import threading import traceback -from time import sleep from sys import exc_info from contextlib import suppress from ..core.helpers.threadpool import ThreadPoolWrapper from ..core.errors import ScanCancelledError, ValidationError, WordlistError -from bbot.core.event.base import is_event - class BaseModule: - # Event types to watch watched_events = [] # Event types to produce @@ -67,13 +63,15 @@ class BaseModule: batch_size = 1 # Seconds to wait before force-submitting batch batch_wait = 10 + # Use in conjunction with .request_with_fail_count() to set_error_state() after this many failed HTTP requests + failed_request_abort_threshold = 5 # When set to false, prevents events generated by this module from being automatically marked as in-scope # Useful for low-confidence modules like speculate and ipneighbor _scope_shepherding = True # Exclude from scan statistics _stats_exclude = False - # outgoing queue size - _qsize = 100 + # outgoing queue size (None == infinite) + _qsize = None # Priority of events raised by this module, 1-5, lower numbers == higher priority _priority = 3 # Name, overridden automatically @@ -86,13 +84,11 @@ def __init__(self, scan): self.errored = False self._log = None self._incoming_event_queue = None - self._outgoing_event_queue = None - # how many seconds we've gone without processing a batch - self._batch_idle = 0 + # seconds since we've submitted a batch + self._last_submitted_batch = None # wrapper around shared thread pool to ensure that a single module doesn't hog more than its share - self.thread_pool = ThreadPoolWrapper( - self.scan._thread_pool.executor, max_workers=self.config.get("max_threads", self.max_threads) - ) + max_workers = self.config.get("max_threads", self.max_threads) + self.thread_pool = ThreadPoolWrapper(self.scan._thread_pool, max_workers=max_workers) self._internal_thread_pool = ThreadPoolWrapper( self.scan._internal_thread_pool.executor, max_workers=self.max_event_handlers ) @@ -101,6 +97,15 @@ def __init__(self, scan): self._cleanedup = False self._watched_events = None + self._lock = threading.RLock() + self.event_received = threading.Condition(self._lock) + + # string constant + self._custom_filter_criteria_msg = "it did not meet custom filter criteria" + + # track number of failures (for .request_with_fail_count()) + self._request_failures = 0 + def setup(self): """ Perform setup functions at the beginning of the scan. @@ -159,6 +164,42 @@ def cleanup(self): """ return + def require_api_key(self): + """ + Use in setup() to ensure the module is configured with an API key + """ + self.api_key = self.config.get("api_key", "") + if self.auth_secret: + try: + self.ping() + self.hugesuccess(f"API is ready") + return True + except Exception as e: + return None, f"Error with API ({str(e).strip()})" + else: + return None, "No API key set" + + def ping(self): + """ + Used in conjuction with require_api_key to ensure an API is up and responding + + Requires the use of an assert statement. + + E.g. if your API has a "/ping" endpoint, you can use it like this: + def ping(self): + r = self.request_with_fail_count(f"{self.base_url}/ping") + resp_content = getattr(r, "text", "") + assert getattr(r, "status_code", 0) == 200, resp_content + """ + return + + @property + def auth_secret(self): + """ + Use this to indicate whether the module has everything it needs for authentication + """ + return getattr(self, "api_key", "") + def get_watched_events(self): """ Override if you need your watched_events to be dynamic @@ -173,25 +214,41 @@ def submit_task(self, *args, **kwargs): def catch(self, *args, **kwargs): return self.scan.manager.catch(*args, **kwargs) + def _postcheck_and_run(self, callback, event): + acceptable, reason = self._event_postcheck(event) + if not acceptable: + if reason: + self.debug(f"Not accepting {event} because {reason}") + return + return callback(event) + def _handle_batch(self, force=False): if self.batch_size <= 1: return if self.num_queued_events > 0 and (force or self.num_queued_events >= self.batch_size): - self._batch_idle = 0 on_finish_callback = None events, finish, report = self.events_waiting if finish: on_finish_callback = self.finish elif report: on_finish_callback = self.report - if events: + checked_events = [] + for e in events: + acceptable, reason = self._event_postcheck(e) + if not acceptable: + if reason: + self.debug(f"Not accepting {e} because {reason}") + continue + checked_events.append(e) + if checked_events: self.debug(f"Handling batch of {len(events):,} events") - self._internal_thread_pool.submit_task( - self.catch, - self.handle_batch, - *events, - _on_finish_callback=on_finish_callback, - ) + if not self.errored: + self._internal_thread_pool.submit_task( + self.catch, + self.handle_batch, + *checked_events, + _on_finish_callback=on_finish_callback, + ) return True return False @@ -209,14 +266,28 @@ def make_event(self, *args, **kwargs): return event def emit_event(self, *args, **kwargs): - if self.scan.stopping: - return event_kwargs = dict(kwargs) for o in ("on_success_callback", "abort_if", "quick"): event_kwargs.pop(o, None) event = self.make_event(*args, **event_kwargs) + if event is None: + return + # nerf event's priority if it's likely not to be in scope + if event.scope_distance > 0: + event_in_scope = self.scan.whitelisted(event) and not self.scan.blacklisted(event) + if not event_in_scope: + event.module_priority += event.scope_distance if event: - self.outgoing_event_queue.put((event, kwargs)) + # Wait for parent event to resolve (in case its scope distance changes) + while 1: + if self.scan.stopping: + return + resolved = event.source._resolved.wait(timeout=0.1) + if resolved: + # update event's scope distance based on its parent + event.scope_distance = event.source.scope_distance + 1 + break + self.scan.manager.incoming_event_queue.put((event, kwargs)) @property def events_waiting(self): @@ -231,11 +302,8 @@ def events_waiting(self): break try: event = self.incoming_event_queue.get_nowait() - if type(event) == str: - if event == "FINISHED": - finish = True - elif event == "REPORT": - report = True + if event.type == "FINISHED": + finish = True else: events.append(event) except queue.Empty: @@ -254,7 +322,6 @@ def start(self): self.thread.start() def _setup(self): - status_codes = {False: "hard-fail", None: "soft-fail", True: "success"} status = False @@ -272,7 +339,7 @@ def _setup(self): if isinstance(e, WordlistError): status = None msg = f"{e}" - self.debug(traceback.format_exc()) + self.trace() return status, str(msg) @property @@ -280,40 +347,23 @@ def _force_batch(self): """ Determine whether a batch should be forcefully submitted """ - # if we've been idle long enough - if self._batch_idle >= self.batch_wait: - return True - # if scan is finishing - if self.scan.status == "FINISHING": - return True - # if there's a batch stalemate - batch_modules = [m for m in self.scan.modules.values() if m.batch_size > 1] - if all([(not m.running) for m in batch_modules]): - return True - return False + # if we're below our maximum threading potential + return self._internal_thread_pool.num_tasks < self.max_event_handlers def _worker(self): - # keep track of how long we've been running - iterations = 0 try: while not self.scan.stopping: - iterations += 1 - # hold the reigns if our outgoing queue is full - if self.outgoing_event_queue.qsize() >= self._qsize: - self._batch_idle += 1 - sleep(0.1) + if self._qsize and self.outgoing_event_queue_qsize >= self._qsize: + with self.event_received: + self.event_received.wait(timeout=0.1) continue if self.batch_size > 1: - if iterations % 10 == 0: - self._batch_idle += 1 - force = self._force_batch - if force: - self._batch_idle = 0 - submitted = self._handle_batch(force=force) + submitted = self._handle_batch(force=self._force_batch) if not submitted: - sleep(0.1) + with self.event_received: + self.event_received.wait(timeout=0.1) else: try: @@ -326,16 +376,15 @@ def _worker(self): continue self.debug(f"Got {e} from {getattr(e, 'module', e)}") # if we receive the special "FINISHED" event - if type(e) == str: - if e == "FINISHED": - self._internal_thread_pool.submit_task(self.catch, self.finish) - elif e == "REPORT": - self._internal_thread_pool.submit_task(self.catch, self.report) + if e.type == "FINISHED": + self._internal_thread_pool.submit_task(self.catch, self.finish) else: if self._type == "output": - self.catch(self.handle_event, e) + self.catch(self._postcheck_and_run, self.handle_event, e) else: - self._internal_thread_pool.submit_task(self.catch, self.handle_event, e) + self._internal_thread_pool.submit_task( + self.catch, self._postcheck_and_run, self.handle_event, e + ) except KeyboardInterrupt: self.debug(f"Interrupted") @@ -344,13 +393,7 @@ def _worker(self): self.verbose(f"Scan cancelled, {e}") except Exception as e: self.set_error_state(f"Exception ({e.__class__.__name__}) in module {self.name}:\n{e}") - self.debug(traceback.format_exc()) - - def _filter_event(self, event, precheck_only=False): - acceptable, reason = self._event_precheck(event) - if acceptable and not precheck_only: - acceptable, reason = self._event_postcheck(event) - return acceptable, reason + self.trace() @property def max_scope_distance(self): @@ -361,14 +404,13 @@ def max_scope_distance(self): def _event_precheck(self, event): """ Check if an event should be accepted by the module - These checks are safe to run before an event has been DNS-resolved + Used when putting an event INTO the modules' queue """ - # special "FINISHED" event - if type(event) == str: - if event in ("FINISHED", "REPORT"): - return True, "" - else: - return False, f'string value "{event}" is invalid' + # special signal event types + if event.type in ("FINISHED",): + return True, "" + if self.errored: + return False, f"module is in error state" # exclude non-watched types if not any(t in self.get_watched_events() for t in ("*", event.type)): return False, "its type is not in watched_events" @@ -393,11 +435,14 @@ def _event_precheck(self, event): def _event_postcheck(self, event): """ Check if an event should be accepted by the module - These checks must be run after an event has been DNS-resolved + Used when taking an event FROM the module's queue (immediately before it's handled) """ - if type(event) == str: + if event.type in ("FINISHED",): return True, "" + if "active" in self.flags and "target" in event.tags and event not in self.scan.whitelist: + return False, "it is not in whitelist and module has active flag" + if self.in_scope_only: if event.scope_distance > 0: return False, "it did not meet in_scope_only filter criteria" @@ -412,13 +457,18 @@ def _event_postcheck(self, event): # custom filtering try: - if not self.filter_event(event): - return False, f"{event} did not meet custom filter criteria" + filter_result = self.filter_event(event) + msg = str(self._custom_filter_criteria_msg) + with suppress(ValueError, TypeError): + filter_result, reason = filter_result + msg += f": {reason}" + if not filter_result: + return False, msg + except ScanCancelledError: + return False, "Scan cancelled" except Exception as e: - import traceback - self.error(f"Error in filter_event({event}): {e}") - self.debug(traceback.format_exc()) + self.trace() return True, "" @@ -430,21 +480,26 @@ def _cleanup(self): self.catch(callback, _force=True) def queue_event(self, event): - if self.incoming_event_queue is not None and not self.errored: - acceptable, reason = self._filter_event(event) - if not acceptable and reason: + if self.incoming_event_queue in (None, False): + self.debug(f"Not in an acceptable state to queue event") + return + acceptable, reason = self._event_precheck(event) + if not acceptable: + if reason and reason != "its type is not in watched_events": self.debug(f"Not accepting {event} because {reason}") - return - if is_event(event): - self.scan.stats.event_consumed(event, self) + return + self.scan.stats.event_consumed(event, self) + try: self.incoming_event_queue.put(event) - else: + except AttributeError: self.debug(f"Not in an acceptable state to queue event") + with self.event_received: + self.event_received.notify() def set_error_state(self, message=None): - if message is not None: - self.error(str(message)) if not self.errored: + if message is not None: + self.warning(str(message)) self.debug(f"Setting error state for module {self.name}") self.errored = True # clear incoming queue @@ -471,24 +526,34 @@ def status(self): internal_pool = self._internal_thread_pool.num_tasks pool_total = main_pool + internal_pool incoming_qsize = 0 - outgoing_qsize = 0 if self.incoming_event_queue: incoming_qsize = self.incoming_event_queue.qsize() - if self.outgoing_event_queue: - outgoing_qsize = self.outgoing_event_queue.qsize() status = { - "events": {"incoming": incoming_qsize, "outgoing": outgoing_qsize}, + "events": {"incoming": incoming_qsize, "outgoing": self.outgoing_event_queue_qsize}, "tasks": {"main_pool": main_pool, "internal_pool": internal_pool, "total": pool_total}, "errored": self.errored, } status["running"] = self._is_running(status) return status + def request_with_fail_count(self, *args, **kwargs): + r = self.helpers.request(*args, **kwargs) + if r is None: + self._request_failures += 1 + else: + self._request_failures = 0 + if self._request_failures >= self.failed_request_abort_threshold: + self.set_error_state(f"Setting error state due to {self._request_failures:,} failed HTTP requests") + return r + @staticmethod def _is_running(module_status): for pool, count in module_status["tasks"].items(): if count > 0: return True + for direction, qsize in module_status["events"].items(): + if qsize > 0: + return True return False @property @@ -508,14 +573,12 @@ def config(self): @property def incoming_event_queue(self): if self._incoming_event_queue is None: - self._incoming_event_queue = queue.SimpleQueue() + self._incoming_event_queue = queue.PriorityQueue() return self._incoming_event_queue @property - def outgoing_event_queue(self): - if self._outgoing_event_queue is None: - self._outgoing_event_queue = queue.SimpleQueue() - return self._outgoing_event_queue + def outgoing_event_queue_qsize(self): + return self.scan.manager.incoming_event_queue.modules.get(str(self), 0) @property def priority(self): @@ -527,7 +590,7 @@ def auth_required(self): @property def log(self): - if self._log is None: + if getattr(self, "_log", None) is None: self._log = logging.getLogger(f"bbot.modules.{self.name}") return self._log @@ -560,20 +623,21 @@ def hugesuccess(self, *args, **kwargs): def warning(self, *args, **kwargs): self.log.warning(*args, extra={"scan_id": self.scan.id}, **kwargs) - self._log_traceback() + self.trace() def hugewarning(self, *args, **kwargs): self.log.hugewarning(*args, extra={"scan_id": self.scan.id}, **kwargs) - self._log_traceback() + self.trace() def error(self, *args, **kwargs): self.log.error(*args, extra={"scan_id": self.scan.id}, **kwargs) - self._log_traceback() + self.trace() - def critical(self, *args, **kwargs): - self.log.critical(*args, extra={"scan_id": self.scan.id}, **kwargs) - - def _log_traceback(self): + def trace(self): e_type, e_val, e_traceback = exc_info() if e_type is not None: - self.debug(traceback.format_exc()) + self.log.trace(traceback.format_exc()) + + def critical(self, *args, **kwargs): + self.log.critical(*args, extra={"scan_id": self.scan.id}, **kwargs) + self.trace() diff --git a/bbot/modules/bevigil.py b/bbot/modules/bevigil.py index 809fe74767..d81a081a91 100644 --- a/bbot/modules/bevigil.py +++ b/bbot/modules/bevigil.py @@ -39,11 +39,11 @@ def handle_event(self, event): def request_subdomains(self, query): url = f"{self.base_url}/{self.helpers.quote(query)}/subdomains/" - return self.helpers.request(url, headers=self.headers) + return self.request_with_fail_count(url, headers=self.headers) def request_urls(self, query): url = f"{self.base_url}/{self.helpers.quote(query)}/urls/" - return self.helpers.request(url, headers=self.headers) + return self.request_with_fail_count(url, headers=self.headers) def parse_subdomains(self, r, query=None): results = set() diff --git a/bbot/modules/binaryedge.py b/bbot/modules/binaryedge.py index 1ddc16721c..7bcb266806 100644 --- a/bbot/modules/binaryedge.py +++ b/bbot/modules/binaryedge.py @@ -21,13 +21,13 @@ def setup(self): def ping(self): url = f"{self.base_url}/user/subscription" - j = self.helpers.request(url, headers=self.headers).json() + j = self.request_with_fail_count(url, headers=self.headers).json() assert j.get("requests_left", 0) > 0 def request_url(self, query): # todo: host query (certs + services) url = f"{self.base_url}/query/domains/subdomain/{self.helpers.quote(query)}" - return self.helpers.request(url, headers=self.headers) + return self.request_with_fail_count(url, headers=self.headers) def parse_results(self, r, query): j = r.json() diff --git a/bbot/modules/bucket_aws.py b/bbot/modules/bucket_aws.py index aff54e1b52..a831e20df0 100644 --- a/bbot/modules/bucket_aws.py +++ b/bbot/modules/bucket_aws.py @@ -4,7 +4,7 @@ class bucket_aws(BaseModule): watched_events = ["DNS_NAME", "STORAGE_BUCKET"] produced_events = ["STORAGE_BUCKET", "FINDING"] - flags = ["active", "safe", "cloud-enum"] + flags = ["active", "safe", "cloud-enum", "web-basic", "web-thorough"] meta = {"description": "Check for S3 buckets related to target"} options = {"max_threads": 10, "permutations": False} options_desc = { diff --git a/bbot/modules/bucket_azure.py b/bbot/modules/bucket_azure.py index 093a45adcc..d138cdece9 100644 --- a/bbot/modules/bucket_azure.py +++ b/bbot/modules/bucket_azure.py @@ -4,7 +4,7 @@ class bucket_azure(bucket_aws): watched_events = ["DNS_NAME", "STORAGE_BUCKET"] produced_events = ["STORAGE_BUCKET", "FINDING"] - flags = ["active", "safe", "cloud-enum"] + flags = ["active", "safe", "cloud-enum", "web-basic", "web-thorough"] meta = {"description": "Check for Azure storage blobs related to target"} options = {"max_threads": 10, "permutations": False} options_desc = { diff --git a/bbot/modules/bucket_digitalocean.py b/bbot/modules/bucket_digitalocean.py index 8fd55a258d..7f59d7ee56 100644 --- a/bbot/modules/bucket_digitalocean.py +++ b/bbot/modules/bucket_digitalocean.py @@ -4,7 +4,7 @@ class bucket_digitalocean(bucket_aws): watched_events = ["DNS_NAME", "STORAGE_BUCKET"] produced_events = ["STORAGE_BUCKET", "FINDING"] - flags = ["active", "safe", "cloud-enum"] + flags = ["active", "safe", "cloud-enum", "web-basic", "web-thorough"] meta = {"description": "Check for DigitalOcean spaces related to target"} options = {"max_threads": 10, "permutations": False} options_desc = { diff --git a/bbot/modules/bucket_gcp.py b/bbot/modules/bucket_gcp.py index 7bcfa799ec..5a2c2d7bc0 100644 --- a/bbot/modules/bucket_gcp.py +++ b/bbot/modules/bucket_gcp.py @@ -8,7 +8,7 @@ class bucket_gcp(bucket_aws): watched_events = ["DNS_NAME", "STORAGE_BUCKET"] produced_events = ["STORAGE_BUCKET", "FINDING"] - flags = ["active", "safe", "cloud-enum"] + flags = ["active", "safe", "cloud-enum", "web-basic", "web-thorough"] meta = {"description": "Check for Google object storage related to target"} options = {"max_threads": 10, "permutations": False} options_desc = { diff --git a/bbot/modules/builtwith.py b/bbot/modules/builtwith.py index 4d989ddf2c..8a920f6a24 100644 --- a/bbot/modules/builtwith.py +++ b/bbot/modules/builtwith.py @@ -14,7 +14,6 @@ class builtwith(shodan_dns): - watched_events = ["DNS_NAME"] produced_events = ["DNS_NAME"] flags = ["affiliates", "subdomain-enum", "passive", "safe"] @@ -45,11 +44,11 @@ def handle_event(self, event): def request_domains(self, query): url = f"{self.base_url}/v20/api.json?KEY={self.api_key}&LOOKUP={query}&NOMETA=yes&NOATTR=yes&HIDETEXT=yes&HIDEDL=yes" - return self.helpers.request(url) + return self.request_with_fail_count(url) def request_redirects(self, query): url = f"{self.base_url}/redirect1/api.json?KEY={self.api_key}&LOOKUP={query}" - return self.helpers.request(url) + return self.request_with_fail_count(url) def parse_domains(self, r, query): """ diff --git a/bbot/modules/bypass403.py b/bbot/modules/bypass403.py index cf017f69c6..e459abe26e 100644 --- a/bbot/modules/bypass403.py +++ b/bbot/modules/bypass403.py @@ -70,15 +70,13 @@ class bypass403(BaseModule): - watched_events = ["URL"] produced_events = ["FINDING"] - flags = ["active", "aggressive", "web-advanced"] + flags = ["active", "aggressive", "web-thorough"] meta = {"description": "Check 403 pages for common bypasses"} in_scope_only = True def handle_event(self, event): - try: compare_helper = self.helpers.http_compare(event.data, allow_redirects=True) except HttpCompareError as e: @@ -86,7 +84,6 @@ def handle_event(self, event): return for sig in signatures: - sig = self.format_signature(sig, event) if sig[2] != None: headers = dict(sig[2]) @@ -98,7 +95,6 @@ def handle_event(self, event): if match == False: if str(subject_response.status_code)[0] != "4": - if sig[2]: added_header_tuple = next(iter(sig[2].items())) reported_signature = f"Added Header: {added_header_tuple[0]}: {added_header_tuple[1]}" diff --git a/bbot/modules/c99.py b/bbot/modules/c99.py index 66f66e6950..7fde17dcd9 100644 --- a/bbot/modules/c99.py +++ b/bbot/modules/c99.py @@ -13,12 +13,12 @@ class c99(shodan_dns): def ping(self): url = f"{self.base_url}/randomnumber?key={self.api_key}&between=1,100&json" - response = self.helpers.request(url) + response = self.request_with_fail_count(url) assert response.json()["success"] == True def request_url(self, query): url = f"{self.base_url}/subdomainfinder?key={self.api_key}&domain={self.helpers.quote(query)}&json" - return self.helpers.request(url) + return self.request_with_fail_count(url) def parse_results(self, r, query): j = r.json() diff --git a/bbot/modules/certspotter.py b/bbot/modules/certspotter.py index 1606b54dc4..5e928fc4be 100644 --- a/bbot/modules/certspotter.py +++ b/bbot/modules/certspotter.py @@ -11,7 +11,7 @@ class certspotter(crobat): def request_url(self, query): url = f"{self.base_url}/issuances?domain={self.helpers.quote(query)}&include_subdomains=true&expand=dns_names" - return self.helpers.request(url) + return self.request_with_fail_count(url) def parse_results(self, r, query): json = r.json() diff --git a/bbot/modules/crobat.py b/bbot/modules/crobat.py index 58e133d82c..4be69b1ffe 100644 --- a/bbot/modules/crobat.py +++ b/bbot/modules/crobat.py @@ -7,19 +7,25 @@ class crobat(BaseModule): Inherited by several other modules including sublist3r, dnsdumpster, etc. """ - flags = ["subdomain-enum", "passive", "safe"] watched_events = ["DNS_NAME"] produced_events = ["DNS_NAME"] + # tag "subdomain-enum" removed 2023-02-24 because API is offline + flags = ["passive", "safe"] meta = {"description": "Query Project Crobat for subdomains"} base_url = "https://sonar.omnisint.io" - + # set module error state after this many failed requests in a row + abort_after_failures = 5 + # whether to reject wildcard DNS_NAMEs + reject_wildcards = True # this helps combat rate limiting by ensuring that a query doesn't execute # until the queue is ready to receive its results _qsize = 1 def setup(self): self.processed = set() + self.http_timeout = self.scan.config.get("http_timeout", 10) + self._failures = 0 return True def filter_event(self, event): @@ -31,18 +37,19 @@ def filter_event(self, event): This filter_event is used across many modules """ - if "unresolved" in event.tags: - return False query = self.make_query(event) if self.already_processed(query): - return False - # discard dns-names with errors - if any([t in event.tags for t in ("a-error", "aaaa-error")]): - return False - # discard wildcards - wildcard_rdtypes = self.helpers.is_wildcard_domain(query) - if any([t in wildcard_rdtypes for t in ("A", "AAAA")]): - return False + return False, "Event was already processed" + if not "target" in event.tags: + if "unresolved" in event.tags: + return False, "Event is unresolved" + if any(t.startswith("cloud-") for t in event.tags): + return False, "Event is a cloud resource and not a direct target" + if self.reject_wildcards: + if any(t in event.tags for t in ("a-wildcard-domain", "aaaa-wildcard-domain", "cname-wildcard-domain")): + return False, "Event is a wildcard domain" + if any(t in event.tags for t in ("a-error", "aaaa-error")): + return False, "Event has a DNS resolution error" self.processed.add(hash(query)) return True @@ -54,19 +61,25 @@ def already_processed(self, hostname): def abort_if(self, event): # this helps weed out unwanted results when scanning IP_RANGES and wildcard domains - return "in-scope" not in event.tags or "wildcard" in event.tags + if "in-scope" not in event.tags: + return True + if any(t in event.tags for t in ("wildcard", "wildcard-domain")): + return True + return False def handle_event(self, event): query = self.make_query(event) results = self.query(query) if results: - for hostname in results: - if not hostname == event: - self.emit_event(hostname, "DNS_NAME", event, abort_if=self.abort_if) + for hostname in set(results): + if hostname: + hostname = hostname.lower() + if hostname.endswith(f".{query}") and not hostname == event.data: + self.emit_event(hostname, "DNS_NAME", event, abort_if=self.abort_if) def request_url(self, query): url = f"{self.base_url}/subdomains/{self.helpers.quote(query)}" - return self.helpers.request(url) + return self.request_with_fail_count(url) def make_query(self, event): if "target" in event.tags: @@ -92,7 +105,5 @@ def query(self, query, parse_fn=None, request_fn=None): return results self.debug(f'No results for "{query}"') except Exception: - import traceback - self.verbose(f"Error retrieving results for {query}") - self.debug(traceback.format_exc()) + self.trace() diff --git a/bbot/modules/crt.py b/bbot/modules/crt.py index dc18951c3a..d474241f77 100644 --- a/bbot/modules/crt.py +++ b/bbot/modules/crt.py @@ -2,13 +2,13 @@ class crt(crobat): - flags = ["subdomain-enum", "passive", "safe"] watched_events = ["DNS_NAME"] produced_events = ["DNS_NAME"] meta = {"description": "Query crt.sh (certificate transparency) for subdomains"} base_url = "https://crt.sh" + reject_wildcards = False def setup(self): self.cert_ids = set() @@ -17,7 +17,7 @@ def setup(self): def request_url(self, query): params = {"q": query, "output": "json"} url = self.helpers.add_get_params(self.base_url, params).geturl() - return self.helpers.request(url) + return self.request_with_fail_count(url, timeout=self.http_timeout + 10) def parse_results(self, r, query): j = r.json() diff --git a/bbot/modules/deadly/ffuf.py b/bbot/modules/deadly/ffuf.py index dfe7b33428..3bf30f8f81 100644 --- a/bbot/modules/deadly/ffuf.py +++ b/bbot/modules/deadly/ffuf.py @@ -7,10 +7,9 @@ class ffuf(BaseModule): - watched_events = ["URL"] - produced_events = ["URL"] - flags = ["brute-force", "aggressive", "active", "web-advanced"] + produced_events = ["URL_UNVERIFIED"] + flags = ["aggressive", "active"] meta = {"description": "A fast web fuzzer written in Go"} options = { @@ -19,6 +18,7 @@ class ffuf(BaseModule): "max_depth": 0, "version": "1.5.0", "extensions": "", + "ignore_redirects": True, } options_desc = { @@ -27,6 +27,7 @@ class ffuf(BaseModule): "max_depth": "the maxium directory depth to attempt to solve", "version": "ffuf version", "extensions": "Optionally include a list of extensions to extend the keyword with (comma separated)", + "ignore_redirects": "Explicitly ignore redirects (301,302)", } blacklist = ["images", "css", "image"] @@ -35,7 +36,7 @@ class ffuf(BaseModule): { "name": "Download ffuf", "unarchive": { - "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_MODULES_FFUF_VERSION}/ffuf_#{BBOT_MODULES_FFUF_VERSION}_linux_amd64.tar.gz", + "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_MODULES_FFUF_VERSION}/ffuf_#{BBOT_MODULES_FFUF_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH}.tar.gz", "include": "ffuf", "dest": "#{BBOT_TOOLS}", "remote_src": True, @@ -46,12 +47,17 @@ class ffuf(BaseModule): in_scope_only = True def setup(self): - self.sanity_canary = "".join(random.choice(string.ascii_lowercase) for i in range(10)) wordlist_url = self.config.get("wordlist", "") + self.debug(f"Using wordlist [{wordlist_url}]") self.wordlist = self.helpers.wordlist(wordlist_url) - self.tempfile = self.generate_templist(self.wordlist) + f = open(self.wordlist, "r") + self.wordlist_lines = f.readlines() + f.close() + self.tempfile, tempfile_len = self.generate_templist() + self.verbose(f"Generated dynamic wordlist with length [{str(tempfile_len)}]") self.extensions = self.config.get("extensions") + self.ignore_redirects = self.config.get("ignore_redirects") return True def handle_event(self, event): @@ -67,19 +73,18 @@ def handle_event(self, event): # if we think its a directory, normalize it. fixed_url = event.data.rstrip("/") + "/" - for r in self.execute_ffuf(self.tempfile, event, fixed_url): - self.emit_event(r["url"], "URL", source=event, tags=[f"status-{r['status']}"]) - - def execute_ffuf(self, tempfile, event, url, suffix=""): + for r in self.execute_ffuf(self.tempfile, fixed_url): + self.emit_event(r["url"], "URL_UNVERIFIED", source=event, tags=[f"status-{r['status']}"]) - ffuf_exts = [""] + def execute_ffuf(self, tempfile, url, prefix="", suffix=""): + ffuf_exts = ["", "/"] if self.extensions: for ext in self.extensions.split(","): ffuf_exts.append(f".{ext}") for x in ffuf_exts: - fuzz_url = f"{url}FUZZ{suffix}" + fuzz_url = f"{url}{prefix}FUZZ{suffix}" command = [ "ffuf", "-H", @@ -93,6 +98,10 @@ def execute_ffuf(self, tempfile, event, url, suffix=""): f"{fuzz_url}{x}", ] + if self.ignore_redirects: + command.append("-fc") + command.append("301,302") + for found in self.helpers.run_live(command): try: found_json = json.loads(found) @@ -114,22 +123,19 @@ def execute_ffuf(self, tempfile, event, url, suffix=""): except json.decoder.JSONDecodeError: self.debug("Received invalid JSON from FFUF") - def generate_templist(self, wordlist, prefix=None): - - f = open(wordlist, "r") - fl = f.readlines() - f.close() + def generate_templist(self, prefix=None): + line_count = 0 virtual_file = [] virtual_file.append(self.sanity_canary) - for idx, val in enumerate(fl): + for idx, val in enumerate(self.wordlist_lines): if idx > self.config.get("lines"): break if len(val) > 0: - if val.strip().lower() in self.blacklist: self.debug(f"Skipping adding [{val.strip()}] to wordlist because it was in the blacklist") else: - if not prefix or val.startswith(prefix): - virtual_file.append(f"{val.strip()}") - return self.helpers.tempfile(virtual_file, pipe=False) + if not prefix or val.strip().lower().startswith(prefix.strip().lower()): + line_count += 1 + virtual_file.append(f"{val.strip().lower()}") + return self.helpers.tempfile(virtual_file, pipe=False), line_count diff --git a/bbot/modules/deadly/nuclei.py b/bbot/modules/deadly/nuclei.py index aed9a5adbb..9fdbc73875 100644 --- a/bbot/modules/deadly/nuclei.py +++ b/bbot/modules/deadly/nuclei.py @@ -5,15 +5,14 @@ class nuclei(BaseModule): - - watched_events = ["URL", "TECHNOLOGY"] + watched_events = ["URL"] produced_events = ["FINDING", "VULNERABILITY"] - flags = ["active", "aggressive", "web-advanced"] + flags = ["active", "aggressive"] meta = {"description": "Fast and customisable vulnerability scanner"} batch_size = 100 options = { - "version": "2.7.9", + "version": "2.8.9", "tags": "", "templates": "", "severity": "", @@ -38,7 +37,7 @@ class nuclei(BaseModule): { "name": "Download nuclei", "unarchive": { - "src": "https://github.com/projectdiscovery/nuclei/releases/download/v#{BBOT_MODULES_NUCLEI_VERSION}/nuclei_#{BBOT_MODULES_NUCLEI_VERSION}_linux_amd64.zip", + "src": "https://github.com/projectdiscovery/nuclei/releases/download/v#{BBOT_MODULES_NUCLEI_VERSION}/nuclei_#{BBOT_MODULES_NUCLEI_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH}.zip", "include": "nuclei", "dest": "#{BBOT_TOOLS}", "remote_src": True, @@ -49,12 +48,11 @@ class nuclei(BaseModule): in_scope_only = True def setup(self): - # attempt to update nuclei templates self.nuclei_templates_dir = self.helpers.tools_dir / "nuclei-templates" self.info("Updating Nuclei templates") update_results = self.helpers.run( - ["nuclei", "-update-directory", self.nuclei_templates_dir, "-update-templates"] + ["nuclei", "-update-template-dir", self.nuclei_templates_dir, "-update-templates"] ) if update_results.stderr: if "Successfully downloaded nuclei-templates" in update_results.stderr: @@ -116,12 +114,9 @@ def setup(self): f"Template Severity: Critical [{self.nucleibudget.severity_stats['critical']}] High [{self.nucleibudget.severity_stats['high']}] Medium [{self.nucleibudget.severity_stats['medium']}] Low [{self.nucleibudget.severity_stats['low']}] Info [{self.nucleibudget.severity_stats['info']}] Unknown [{self.nucleibudget.severity_stats['unknown']}]" ) - self.stats_file = self.helpers.tempfile_tail(callback=self.log_nuclei_status) - return True def handle_batch(self, *events): - nuclei_input = [str(e.data) for e in events] for severity, template, host, name, extracted_results in self.execute_nuclei(nuclei_input): source_event = self.correlate_event(events, host) @@ -161,11 +156,10 @@ def correlate_event(self, events, host): self.warning("Failed to correlate nuclei result with event") def execute_nuclei(self, nuclei_input): - command = [ "nuclei", "-json", - "-update-directory", + "-update-template-dir", self.nuclei_templates_dir, "-rate-limit", self.ratelimit, @@ -195,35 +189,39 @@ def execute_nuclei(self, nuclei_input): command.append("-t") command.append(self.budget_templates_file) - with open(self.stats_file, "w") as stats_file: - for line in self.helpers.run_live(command, input=nuclei_input, stderr=stats_file): - try: - j = json.loads(line) - except json.decoder.JSONDecodeError: - self.debug(f"Failed to decode line: {line}") - continue - - template = j.get("template-id", "") - - # try to get the specific matcher name - name = j.get("matcher-name", "") - - # fall back to regular name - if not name: - self.debug( - f"Couldn't get matcher-name from nuclei json, falling back to regular name. Template: [{template}]" - ) - name = j.get("info", {}).get("name", "") - - severity = j.get("info", {}).get("severity", "").upper() - host = j.get("host", "") - - extracted_results = j.get("extracted-results", []) - - if template and name and severity and host: - yield (severity, template, host, name, extracted_results) - else: - self.debug("Nuclei result missing one or more required elements, not reporting. JSON: ({j})") + stats_file = self.helpers.tempfile_tail(callback=self.log_nuclei_status) + try: + with open(stats_file, "w") as stats_fh: + for line in self.helpers.run_live(command, input=nuclei_input, stderr=stats_fh): + try: + j = json.loads(line) + except json.decoder.JSONDecodeError: + self.debug(f"Failed to decode line: {line}") + continue + + template = j.get("template-id", "") + + # try to get the specific matcher name + name = j.get("matcher-name", "") + + # fall back to regular name + if not name: + self.debug( + f"Couldn't get matcher-name from nuclei json, falling back to regular name. Template: [{template}]" + ) + name = j.get("info", {}).get("name", "") + + severity = j.get("info", {}).get("severity", "").upper() + host = j.get("host", "") + + extracted_results = j.get("extracted-results", []) + + if template and name and severity and host: + yield (severity, template, host, name, extracted_results) + else: + self.debug("Nuclei result missing one or more required elements, not reporting. JSON: ({j})") + finally: + stats_file.unlink() def log_nuclei_status(self, line): try: @@ -297,7 +295,6 @@ def find_collapsable_templates(self): if yf: for paths in self.get_yaml_request_attr(yf, "path"): if set(paths).issubset(self.budget_paths): - headers = self.get_yaml_request_attr(yf, "headers") for header in headers: if header: diff --git a/bbot/modules/deadly/vhost.py b/bbot/modules/deadly/vhost.py index aa301d1c9f..1b7c25c140 100644 --- a/bbot/modules/deadly/vhost.py +++ b/bbot/modules/deadly/vhost.py @@ -2,10 +2,9 @@ class vhost(BaseModule): - watched_events = ["URL"] produced_events = ["VHOST", "DNS_NAME"] - flags = ["active", "brute-force", "aggressive", "slow", "web-advanced"] + flags = ["active", "aggressive", "slow"] meta = {"description": "Fuzz for virtual hosts"} special_vhost_list = ["127.0.0.1", "localhost", "host.docker.internal"] @@ -21,7 +20,7 @@ class vhost(BaseModule): { "name": "Download ffuf", "unarchive": { - "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_MODULES_FFUF_VERSION}/ffuf_#{BBOT_MODULES_FFUF_VERSION}_linux_amd64.tar.gz", + "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_MODULES_FFUF_VERSION}/ffuf_#{BBOT_MODULES_FFUF_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH}.tar.gz", "include": "ffuf", "dest": "#{BBOT_TOOLS}", "remote_src": True, diff --git a/bbot/modules/dnscommonsrv.py b/bbot/modules/dnscommonsrv.py index b2aa44a409..8333e8293b 100644 --- a/bbot/modules/dnscommonsrv.py +++ b/bbot/modules/dnscommonsrv.py @@ -106,4 +106,4 @@ def handle_event(self, event): queries = [event.data] + [f"{srv}.{event.data}" for srv in common_srvs] for query, results in self.helpers.resolve_batch(queries, type="srv"): if results: - self.emit_event(query, "DNS_NAME", tags=["srv_record"], source=event) + self.emit_event(query, "DNS_NAME", tags=["srv-record"], source=event) diff --git a/bbot/modules/dnsdumpster.py b/bbot/modules/dnsdumpster.py index 45eb4591ed..82ce316bb2 100644 --- a/bbot/modules/dnsdumpster.py +++ b/bbot/modules/dnsdumpster.py @@ -10,14 +10,14 @@ class dnsdumpster(crobat): flags = ["subdomain-enum", "passive", "safe"] meta = {"description": "Query dnsdumpster for subdomains"} - deps_pip = ["beautifulsoup4", "lxml"] + deps_pip = ["bs4", "lxml"] base_url = "https://dnsdumpster.com" def query(self, domain): ret = [] # first, get the CSRF tokens - res1 = self.helpers.request(self.base_url) + res1 = self.request_with_fail_count(self.base_url) status_code = getattr(res1, "status_code", 0) if status_code in [429]: self.verbose(f'Too many requests "{status_code}"') @@ -56,7 +56,7 @@ def query(self, domain): # Otherwise, do the needful subdomains = set() - res2 = self.helpers.request( + res2 = self.request_with_fail_count( f"{self.base_url}/", method="POST", cookies={"csrftoken": csrftoken}, diff --git a/bbot/modules/dnszonetransfer.py b/bbot/modules/dnszonetransfer.py index cfda3b24e0..6e56284f92 100644 --- a/bbot/modules/dnszonetransfer.py +++ b/bbot/modules/dnszonetransfer.py @@ -5,7 +5,6 @@ class dnszonetransfer(BaseModule): - flags = ["subdomain-enum", "active", "safe"] watched_events = ["DNS_NAME"] produced_events = ["DNS_NAME"] diff --git a/bbot/modules/emailformat.py b/bbot/modules/emailformat.py index a54325ad79..fa47f23cf5 100644 --- a/bbot/modules/emailformat.py +++ b/bbot/modules/emailformat.py @@ -16,7 +16,7 @@ def extract_emails(self, content): def handle_event(self, event): _, query = self.helpers.split_domain(event.data) url = f"{self.base_url}/d/{self.helpers.quote(query)}/" - r = self.helpers.request(url) + r = self.request_with_fail_count(url) if not r: return for email in self.extract_emails(r.text): diff --git a/bbot/modules/ffuf_shortnames.py b/bbot/modules/ffuf_shortnames.py index 525adaa97c..d3d469654f 100644 --- a/bbot/modules/ffuf_shortnames.py +++ b/bbot/modules/ffuf_shortnames.py @@ -1,30 +1,61 @@ +import re import random import string from bbot.modules.deadly.ffuf import ffuf -class ffuf_shortnames(ffuf): +def find_common_prefixes(strings, minimum_set_length=4): + prefix_candidates = [s[:i] for s in strings if len(s) == 6 for i in range(3, 6)] + frequency_dict = {item: prefix_candidates.count(item) for item in prefix_candidates} + frequency_dict = {k: v for k, v in frequency_dict.items() if v >= minimum_set_length} + prefix_list = list(set(frequency_dict.keys())) + + found_prefixes = set() + for prefix in prefix_list: + prefix_frequency = frequency_dict[prefix] + is_substring = False + + for k, v in frequency_dict.items(): + if prefix != k: + if prefix in k: + is_substring = True + if not is_substring: + found_prefixes.add(prefix) + else: + if prefix_frequency > v and (len(k) - len(prefix) == 1): + found_prefixes.add(prefix) + return list(found_prefixes) + +class ffuf_shortnames(ffuf): watched_events = ["URL_HINT"] - produced_events = ["URL"] - flags = ["brute-force", "aggressive", "active", "web-advanced", "iis-shortnames"] + produced_events = ["URL_UNVERIFIED"] + flags = ["aggressive", "active", "iis-shortnames", "web-thorough"] meta = {"description": "Use ffuf in combination IIS shortnames"} options = { - "wordlist": "https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/Web-Content/raft-large-words.txt", - "lines": 20000, + "wordlist": "", # default is defined within setup function + "wordlist_extensions": "", # default is defined within setup function + "lines": 1000000, "max_depth": 1, "version": "1.5.0", "extensions": "", + "ignore_redirects": True, + "find_common_prefixes": False, + "find_delimeters": True, } options_desc = { "wordlist": "Specify wordlist to use when finding directories", + "wordlist_extensions": "Specify wordlist to use when making extension lists", "lines": "take only the first N lines from the wordlist when finding directories", "max_depth": "the maxium directory depth to attempt to solve", "version": "ffuf version", "extensions": "Optionally include a list of extensions to extend the keyword with (comma separated)", + "ignore_redirects": "Explicitly ignore redirects (301,302)", + "find_common_prefixes": "Attempt to automatically detect common prefixes and make additional ffuf runs against them", + "find_delimeters": "Attempt to detect common delimeters and make additional ffuf runs against them", } in_scope_only = True @@ -33,7 +64,7 @@ class ffuf_shortnames(ffuf): { "name": "Download ffuf", "unarchive": { - "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_MODULES_FFUF_VERSION}/ffuf_#{BBOT_MODULES_FFUF_VERSION}_linux_amd64.tar.gz", + "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_MODULES_FFUF_VERSION}/ffuf_#{BBOT_MODULES_FFUF_VERSION}_#{BBOT_OS_PLATFORM}_#{BBOT_CPU_ARCH}.tar.gz", "include": "ffuf", "dest": "#{BBOT_TOOLS}", "remote_src": True, @@ -41,46 +72,154 @@ class ffuf_shortnames(ffuf): } ] - extension_helper = { - "asp": ["aspx"], - "asm": ["asmx"], - "ash": ["ashx"], - "jsp": ["jspx"], - "htm": ["html"], - "sht": ["shtml"], - "php": ["php2", "php3", "php4", "ph5"], - } - def setup(self): self.sanity_canary = "".join(random.choice(string.ascii_lowercase) for i in range(10)) wordlist = self.config.get("wordlist", "") + if not wordlist: + wordlist = f"{self.helpers.wordlist_dir}/ffuf_shortname_candidates.txt" + self.debug(f"Using [{wordlist}] for shortname candidate list") self.wordlist = self.helpers.wordlist(wordlist) + f = open(self.wordlist, "r") + self.wordlist_lines = f.readlines() + f.close() + + wordlist_extensions = self.config.get("wordlist_extensions", "") + if not wordlist_extensions: + wordlist_extensions = f"{self.helpers.wordlist_dir}/raft-small-extensions-lowercase_CLEANED.txt" + self.debug(f"Using [{wordlist_extensions}] for shortname candidate extension list") + self.wordlist_extensions = self.helpers.wordlist(wordlist_extensions) self.extensions = self.config.get("extensions") - return True + self.ignore_redirects = self.config.get("ignore_redirects") - def handle_event(self, event): - - filename_hint = event.parsed.path.rsplit(".", 1)[0].split("/")[-1] - - tempfile = self.generate_templist(self.wordlist, prefix=filename_hint) - - root_stub = "/".join(event.parsed.path.split("/")[:-1]) - root_url = f"{event.parsed.scheme}://{event.parsed.netloc}{root_stub}/" - - if "file" in event.tags: - extension_hint = event.parsed.path.rsplit(".", 1)[1] - used_extensions = [] - used_extensions.append(extension_hint) - for ex in self.extension_helper.keys(): - if extension_hint == ex: - for ex2 in self.extension_helper[ex]: - used_extensions.append(ex2) - - for ext in used_extensions: - for r in self.execute_ffuf(tempfile, event, root_url, suffix=f".{ext}"): - self.emit_event(r["url"], "URL", source=event, tags=[f"status-{r['status']}"]) + self.per_host_collection = {} + self.shortname_to_event = {} + return True - elif "dir" in event.tags: + def build_extension_list(self, event): + used_extensions = [] + extension_hint = event.parsed.path.rsplit(".", 1)[1].lower().strip() + with open(self.wordlist_extensions) as f: + for l in f: + l = l.lower().lstrip(".") + if l.lower().startswith(extension_hint): + used_extensions.append(l.strip()) + + return used_extensions + + def find_delimeter(self, hint): + delimeters = ["_", "-"] + for d in delimeters: + if d in hint: + if not hint.startswith(d) and not hint.endswith(d): + return d, hint.split(d)[0], hint.split(d)[1] + return None - for r in self.execute_ffuf(tempfile, event, root_url): - self.emit_event(r["url"], "URL", source=event, tags=[f"status-{r['status']}"]) + def handle_event(self, event): + if event.source.type == "URL": + filename_hint = re.sub(r"~\d", "", event.parsed.path.rsplit(".", 1)[0].split("/")[-1]).lower() + + host = f"{event.source.parsed.scheme}://{event.source.parsed.netloc}/" + if host not in self.per_host_collection.keys(): + self.per_host_collection[host] = [(filename_hint, event.source.data)] + + else: + self.per_host_collection[host].append((filename_hint, event.source.data)) + + self.shortname_to_event[filename_hint] = event + + root_stub = "/".join(event.parsed.path.split("/")[:-1]) + root_url = f"{event.parsed.scheme}://{event.parsed.netloc}{root_stub}/" + + if "shortname-file" in event.tags: + used_extensions = self.build_extension_list(event) + + if len(filename_hint) == 6: + tempfile, tempfile_len = self.generate_templist(prefix=filename_hint) + self.verbose( + f"generated temp word list of size [{str(tempfile_len)}] for filename hint: [{filename_hint}]" + ) + + else: + tempfile = self.helpers.tempfile([filename_hint], pipe=False) + tempfile_len = 1 + + if tempfile_len > 0: + if "shortname-file" in event.tags: + for ext in used_extensions: + for r in self.execute_ffuf(tempfile, root_url, suffix=f".{ext}"): + self.emit_event(r["url"], "URL_UNVERIFIED", source=event, tags=[f"status-{r['status']}"]) + + elif "shortname-directory" in event.tags: + for r in self.execute_ffuf(tempfile, root_url): + self.emit_event(r["url"], "URL_UNVERIFIED", source=event, tags=[f"status-{r['status']}"]) + + if self.config.get("find_delimeters"): + if "shortname-directory" in event.tags: + delimeter_r = self.find_delimeter(filename_hint) + if delimeter_r: + delimeter, prefix, partial_hint = delimeter_r + self.verbose(f"Detected delimeter [{delimeter}] in hint [{filename_hint}]") + tempfile, tempfile_len = self.generate_templist(prefix=partial_hint) + for r in self.execute_ffuf(tempfile, root_url, prefix=f"{prefix}{delimeter}"): + self.emit_event(r["url"], "URL_UNVERIFIED", source=event, tags=[f"status-{r['status']}"]) + + elif "shortname-file" in event.tags: + for ext in used_extensions: + delimeter_r = self.find_delimeter(filename_hint) + if delimeter_r: + delimeter, prefix, partial_hint = delimeter_r + self.verbose(f"Detected delimeter [{delimeter}] in hint [{filename_hint}]") + tempfile, tempfile_len = self.generate_templist(prefix=partial_hint) + for r in self.execute_ffuf( + tempfile, root_url, prefix=f"{prefix}{delimeter}", suffix=f".{ext}" + ): + self.emit_event( + r["url"], "URL_UNVERIFIED", source=event, tags=[f"status-{r['status']}"] + ) + + def finish(self): + if self.config.get("find_common_prefixes"): + per_host_collection = dict(self.per_host_collection) + self.per_host_collection.clear() + + for host, hint_tuple_list in per_host_collection.items(): + hint_list = [x[0] for x in hint_tuple_list] + + common_prefixes = find_common_prefixes(hint_list) + for prefix in common_prefixes: + self.verbose(f"Found common prefix: [{prefix}] for host [{host}]") + for hint_tuple in hint_tuple_list: + hint, url = hint_tuple + if hint.startswith(prefix): + partial_hint = hint[len(prefix) :] + + # safeguard to prevent loading the entire wordlist + if len(partial_hint) > 0: + tempfile, tempfile_len = self.generate_templist(prefix=partial_hint) + + if "shortname-directory" in self.shortname_to_event[hint].tags: + self.verbose( + f"Running common prefix check for URL_HINT: {hint} with prefix: {prefix} and partial_hint: {partial_hint}" + ) + + for r in self.execute_ffuf(tempfile, url, prefix=prefix): + self.emit_event( + r["url"], + "URL_UNVERIFIED", + source=self.shortname_to_event[hint], + tags=[f"status-{r['status']}"], + ) + elif "shortname-file" in self.shortname_to_event[hint].tags: + used_extensions = self.build_extension_list(self.shortname_to_event[hint]) + + for ext in used_extensions: + self.verbose( + f"Running common prefix check for URL_HINT: {hint} with prefix: {prefix}, extension: .{ext}, and partial_hint: {partial_hint}" + ) + for r in self.execute_ffuf(tempfile, url, prefix=prefix, suffix=f".{ext}"): + self.emit_event( + r["url"], + "URL_UNVERIFIED", + source=self.shortname_to_event[hint], + tags=[f"status-{r['status']}"], + ) diff --git a/bbot/modules/fingerprintx.py b/bbot/modules/fingerprintx.py new file mode 100644 index 0000000000..e6b76227e0 --- /dev/null +++ b/bbot/modules/fingerprintx.py @@ -0,0 +1,55 @@ +import json +import subprocess +from bbot.modules.base import BaseModule + + +class fingerprintx(BaseModule): + watched_events = ["OPEN_TCP_PORT"] + produced_events = ["PROTOCOL"] + flags = ["active", "safe", "service-enum", "slow"] + meta = {"description": "Fingerprint exposed services like RDP, SSH, MySQL, etc."} + options = {"version": "1.1.4"} + options_desc = {"version": "fingerprintx version"} + batch_size = 10 + max_event_handlers = 2 + _priority = 2 + + deps_ansible = [ + { + "name": "Download fingerprintx", + "unarchive": { + "src": "https://github.com/praetorian-inc/fingerprintx/releases/download/v#{BBOT_MODULES_FINGERPRINTX_VERSION}/fingerprintx_#{BBOT_MODULES_FINGERPRINTX_VERSION}_#{BBOT_OS_PLATFORM}_#{BBOT_CPU_ARCH}.tar.gz", + "include": "fingerprintx", + "dest": "#{BBOT_TOOLS}", + "remote_src": True, + }, + }, + ] + + def handle_batch(self, *events): + _input = {e.data: e for e in events} + command = ["fingerprintx", "--json"] + for line in self.helpers.run_live(command, input=list(_input), stderr=subprocess.DEVNULL): + try: + j = json.loads(line) + except Exception as e: + self.debug(f'Error parsing line "{line}" as JSON: {e}') + break + ip = j.get("ip", "") + host = j.get("host", ip) + port = str(j.get("port", "")) + banner = j.get("metadata", {}).get("banner", "").strip() + if port: + port_data = f"{host}:{port}" + protocol = j.get("protocol", "") + tags = set() + if host and ip: + tags.add(f"ip-{ip}") + if host and port and protocol: + source_event = _input.get(port_data) + protocol_data = {"host": host, "protocol": protocol.upper()} + if port: + protocol_data["port"] = port + if banner: + protocol_data["banner"] = banner + self.emit_event(protocol_data, "PROTOCOL", source=source_event, tags=tags) diff --git a/bbot/modules/fullhunt.py b/bbot/modules/fullhunt.py index 8e3e44c956..e0c051c561 100644 --- a/bbot/modules/fullhunt.py +++ b/bbot/modules/fullhunt.py @@ -18,13 +18,13 @@ def setup(self): def ping(self): url = f"{self.base_url}/auth/status" - j = self.helpers.request(url, headers=self.headers).json() + j = self.request_with_fail_count(url, headers=self.headers).json() remaining = j["user_credits"]["remaining_credits"] assert remaining > 0, "No credits remaining" def request_url(self, query): url = f"{self.base_url}/domain/{self.helpers.quote(query)}/subdomains" - return self.helpers.request(url, headers=self.headers) + return self.request_with_fail_count(url, headers=self.headers) def parse_results(self, r, query): return r.json().get("hosts", []) diff --git a/bbot/modules/generic_ssrf.py b/bbot/modules/generic_ssrf.py index ce7c2811b0..db0aa2a3c6 100644 --- a/bbot/modules/generic_ssrf.py +++ b/bbot/modules/generic_ssrf.py @@ -35,7 +35,6 @@ class BaseSubmodule: - technique_description = "base technique description" severity = "INFO" paths = None @@ -80,7 +79,6 @@ def process(self, event, r, subdomain_tag): class Generic_SSRF(BaseSubmodule): - technique_description = "Generic SSRF (GET)" severity = "HIGH" @@ -88,7 +86,6 @@ def set_base_url(self, event): return event.data def create_paths(self): - query_string = "" for param in ssrf_params: query_string += f"{param}=http://SSRF_CANARY&" @@ -101,7 +98,6 @@ def create_paths(self): class Generic_SSRF_POST(BaseSubmodule): - technique_description = "Generic SSRF (POST)" severity = "HIGH" @@ -109,7 +105,6 @@ def set_base_url(self, event): return event.data def test(self, event): - test_url = f"{event.data}" subdomain_tag = self.parent_module.helpers.rand_string(4, digits=False) @@ -131,13 +126,11 @@ def test(self, event): class Generic_XXE(BaseSubmodule): - technique_description = "Generic XXE" severity = "HIGH" paths = None def test(self, event): - rand_entity = self.parent_module.helpers.rand_string(4, digits=False) subdomain_tag = self.parent_module.helpers.rand_string(4, digits=False) @@ -156,17 +149,15 @@ def test(self, event): class generic_ssrf(BaseModule): - watched_events = ["URL"] produced_events = ["VULNERABILITY"] - flags = ["active", "aggressive", "web-advanced"] + flags = ["active", "aggressive", "web-thorough"] meta = {"description": "Check for generic SSRFs"} in_scope_only = True deps_apt = ["curl"] def setup(self): - self.submodules = {} self.interactsh_subdomain_tags = {} self.severity = None diff --git a/bbot/modules/gowitness.py b/bbot/modules/gowitness.py index 8936229969..8759d94074 100644 --- a/bbot/modules/gowitness.py +++ b/bbot/modules/gowitness.py @@ -33,6 +33,7 @@ class gowitness(BaseModule): "package": {"name": "chromium", "state": "present"}, "become": True, "when": "ansible_facts['os_family'] != 'Debian'", + "ignore_errors": True, }, { "name": "Install Chromium dependencies (Debian)", @@ -42,6 +43,7 @@ class gowitness(BaseModule): }, "become": True, "when": "ansible_facts['os_family'] == 'Debian'", + "ignore_errors": True, }, { "name": "Get latest Chromium version (Debian)", @@ -51,6 +53,7 @@ class gowitness(BaseModule): }, "register": "chromium_version", "when": "ansible_facts['os_family'] == 'Debian'", + "ignore_errors": True, }, { "name": "Download Chromium (Debian)", @@ -61,11 +64,12 @@ class gowitness(BaseModule): "creates": "#{BBOT_TOOLS}/chrome-linux", }, "when": "ansible_facts['os_family'] == 'Debian'", + "ignore_errors": True, }, { "name": "Download gowitness", "get_url": { - "url": "https://github.com/sensepost/gowitness/releases/download/#{BBOT_MODULES_GOWITNESS_VERSION}/gowitness-#{BBOT_MODULES_GOWITNESS_VERSION}-linux-amd64", + "url": "https://github.com/sensepost/gowitness/releases/download/#{BBOT_MODULES_GOWITNESS_VERSION}/gowitness-#{BBOT_MODULES_GOWITNESS_VERSION}-#{BBOT_OS_PLATFORM}-#{BBOT_CPU_ARCH}", "dest": "#{BBOT_TOOLS}/gowitness", "mode": "755", }, diff --git a/bbot/modules/hackertarget.py b/bbot/modules/hackertarget.py index 05bf6828cf..38ff695818 100644 --- a/bbot/modules/hackertarget.py +++ b/bbot/modules/hackertarget.py @@ -10,7 +10,7 @@ class hackertarget(crobat): base_url = "https://api.hackertarget.com" def request_url(self, query): - return self.helpers.request(f"{self.base_url}/hostsearch/?q={self.helpers.quote(query)}") + return self.request_with_fail_count(f"{self.base_url}/hostsearch/?q={self.helpers.quote(query)}") def parse_results(self, r, query): for line in r.text.splitlines(): diff --git a/bbot/modules/host_header.py b/bbot/modules/host_header.py index e349b8149a..f6d28fee69 100644 --- a/bbot/modules/host_header.py +++ b/bbot/modules/host_header.py @@ -3,10 +3,9 @@ class host_header(BaseModule): - watched_events = ["HTTP_RESPONSE"] produced_events = ["FINDING"] - flags = ["active", "aggressive", "web-advanced"] + flags = ["active", "aggressive", "web-thorough"] meta = {"description": "Try common HTTP Host header spoofing techniques"} in_scope_only = True @@ -14,7 +13,6 @@ class host_header(BaseModule): deps_apt = ["curl"] def setup(self): - self.interactsh_subdomain_tags = {} if self.scan.config.get("interactsh_disable", False) == False: try: @@ -65,15 +63,12 @@ def cleanup(self): self.warning(f"Interactsh failure: {e}") def handle_event(self, event): - # get any set-cookie responses from the response and add them to the request added_cookies = {} for k, v in event.data["header-dict"].items(): - if k.lower() == "set-cookie": - cookie_string = v cookie_split = cookie_string.split("=") added_cookies = {cookie_split[0]: cookie_split[1]} @@ -164,7 +159,6 @@ def handle_event(self, event): # emit all the domain reflections we found for dr in domain_reflections: - self.emit_event( { "host": str(event.host), diff --git a/bbot/modules/httpx.py b/bbot/modules/httpx.py index 9ee5d18eb3..8a941ac6d1 100644 --- a/bbot/modules/httpx.py +++ b/bbot/modules/httpx.py @@ -4,10 +4,9 @@ class httpx(BaseModule): - watched_events = ["OPEN_TCP_PORT", "URL_UNVERIFIED", "URL"] produced_events = ["URL", "HTTP_RESPONSE"] - flags = ["active", "safe", "web-basic"] + flags = ["active", "safe", "web-basic", "web-thorough", "subdomain-enum"] meta = {"description": "Visit webpages. Many other modules rely on httpx"} batch_size = 500 @@ -21,7 +20,7 @@ class httpx(BaseModule): { "name": "Download httpx", "unarchive": { - "src": "https://github.com/projectdiscovery/httpx/releases/download/v#{BBOT_MODULES_HTTPX_VERSION}/httpx_#{BBOT_MODULES_HTTPX_VERSION}_linux_amd64.zip", + "src": "https://github.com/projectdiscovery/httpx/releases/download/v#{BBOT_MODULES_HTTPX_VERSION}/httpx_#{BBOT_MODULES_HTTPX_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH}.zip", "include": "httpx", "dest": "#{BBOT_TOOLS}", "remote_src": True, @@ -30,6 +29,7 @@ class httpx(BaseModule): ] scope_distance_modifier = 0 + _priority = 2 def setup(self): self.timeout = self.scan.config.get("httpx_timeout", 5) @@ -39,7 +39,6 @@ def setup(self): return True def filter_event(self, event): - if "_wildcard" in str(event.host).split("."): return False @@ -60,7 +59,6 @@ def filter_event(self, event): return True def handle_batch(self, *events): - stdin = {} for e in events: url_hash = None @@ -98,6 +96,8 @@ def handle_batch(self, *events): # "-r", # self.helpers.resolver_file, ] + for hk, hv in self.scan.config.get("http_headers", {}).items(): + command += ["-header", f"{hk}: {hv}"] proxy = self.scan.config.get("http_proxy", "") if proxy: command += ["-http-proxy", proxy] @@ -127,10 +127,16 @@ def handle_batch(self, *events): # main URL httpx_ip = j.get("host", "unknown") - url_event = self.make_event(url, "URL", source_event, tags=[f"status-{status_code}", f"ip-{httpx_ip}"]) + tags = [f"status-{status_code}", f"ip-{httpx_ip}"] + title = self.helpers.tagify(j.get("title", "")) + if title: + tags.append(f"http-title-{title}") + url_event = self.make_event(url, "URL", source_event, tags=tags) if url_event and not "httpx-only" in url_event.tags: if url_event != source_event: self.emit_event(url_event) + else: + url_event._resolved.set() # HTTP response self.emit_event(j, "HTTP_RESPONSE", url_event, internal=True) diff --git a/bbot/modules/hunt.py b/bbot/modules/hunt.py index 8790f6477b..88448c8e55 100644 --- a/bbot/modules/hunt.py +++ b/bbot/modules/hunt.py @@ -124,13 +124,12 @@ class hunt(BaseModule): watched_events = ["HTTP_RESPONSE"] produced_events = ["FINDING"] - flags = ["active", "safe", "web-advanced"] + flags = ["active", "safe", "web-basic", "web-thorough"] meta = {"description": "Watch for commonly-exploitable HTTP parameters"} # accept all events regardless of scope distance scope_distance_modifier = None def extract_params(self, body): - # check for input tags input_tag = self.input_tag_regex.findall(body) diff --git a/bbot/modules/hunterio.py b/bbot/modules/hunterio.py index 46aeadd19e..845488844c 100644 --- a/bbot/modules/hunterio.py +++ b/bbot/modules/hunterio.py @@ -2,7 +2,6 @@ class hunterio(shodan_dns): - watched_events = ["DNS_NAME"] produced_events = ["EMAIL_ADDRESS", "DNS_NAME", "URL_UNVERIFIED"] flags = ["passive", "email-enum", "subdomain-enum", "safe"] diff --git a/bbot/modules/iis_shortnames.py b/bbot/modules/iis_shortnames.py index 42b746932f..54d5582498 100644 --- a/bbot/modules/iis_shortnames.py +++ b/bbot/modules/iis_shortnames.py @@ -1,130 +1,217 @@ +import re +from threading import Lock + from bbot.modules.base import BaseModule +valid_chars = "ETAONRISHDLFCMUGYPWBVKJXQZ0123456789_-$~()&!#%'@^`{}]]" -class iis_shortnames(BaseModule): +def encode_all(string): + return "".join("%{0:0>2}".format(format(ord(char), "x")) for char in string) + + +class iis_shortnames(BaseModule): watched_events = ["URL"] produced_events = ["URL_HINT"] - flags = ["active", "safe", "web-basic", "iis-shortnames"] + flags = ["active", "safe", "web-basic", "web-thorough", "iis-shortnames"] meta = {"description": "Check for IIS shortname vulnerability"} - options = {"detect_only": True, "threads": 8} + options = {"detect_only": True, "max_node_count": 30} options_desc = { "detect_only": "Only detect the vulnerability and do not run the shortname scanner", - "threads": "the number of threads to run concurrently when executing the IIS shortname scanner", + "max_node_count": "Limit how many nodes to attempt to resolve on any given recursion branch", } in_scope_only = True - deps_ansible = [ - { - "name": "Install Java JRE (Debian)", - "become": True, - "package": {"name": "default-jre", "state": "latest"}, - "when": """ansible_facts['os_family'] == 'Debian'""", - }, - { - "name": "Install Java JRE (RedHat)", - "become": True, - "package": {"name": "java-latest-openjdk", "state": "latest"}, - "when": """ansible_facts['os_family'] == 'RedHat'""", - }, - { - "name": "Install Java JRE (Archlinux)", - "package": {"name": "jre-openjdk", "state": "present"}, - "become": True, - "when": """ansible_facts['os_family'] == 'Archlinux'""", - }, - ] + max_event_handlers = 8 + + def detect(self, target): + technique = None + detections = [] + random_string = self.helpers.rand_string(8) + control_url = f"{target}{random_string}*~1*/a.aspx" + test_url = f"{target}*~1*/a.aspx" + + for method in ["GET", "POST", "OPTIONS", "DEBUG", "HEAD", "TRACE"]: + control = self.helpers.request(method=method, url=control_url, allow_redirects=False, retries=2) + test = self.helpers.request(method=method, url=test_url, allow_redirects=False, retries=2) + if (control != None) and (test != None): + if control.status_code != test.status_code: + technique = f"{str(control.status_code)}/{str(test.status_code)} HTTP Code" + detections.append((method, test.status_code, technique)) + + elif ("Error Code