# Run the scanner and pipe JSON into Elastic Bulk API
python log_scanner.py /var/log --format json | \
curl -s -H "Content-Type: application/x-ndjson" -XPOST \
"http://elastic.example.com:9200/_bulk" \
--data-binary @-
Index mapping suggestion (put in Kibana → Dev Tools):
PUT logs/_mapping
"properties":
"timestamp": "type": "date",
"severity": "type": "keyword",
"file_path": "type": "keyword",
"line_no": "type": "integer",
"matched_line": "type": "text",
"sha256": "type": "keyword"
Create a simple Kibana alert:
Rule: Detect “Potential Credential Leak”
Index pattern: logs
Condition: severity == "high"
Action: Email
The search query you've provided, "allintext:username filetype:log password.log facebook install," suggests a specific type of search that could be used for various purposes, potentially including cybersecurity research, penetration testing, or even malicious activities. Let's break down what this query implies and provide a deep dive into its components and implications: allintext username filetype log passwordlog facebook install
Apache or Nginx configurations should block direct access to .log files. A properly configured server would return a 403 Forbidden or 404 Not Found. However, many default configurations serve any file inside DocumentRoot.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Log‑Scanner Feature – “allintext username filetype:log passwordlog facebook install”
Detects any line inside a log‑type file that contains ALL of the following
tokens (case‑insensitive):
- username
- passwordlog
- facebook
- install
The scanner is stream‑oriented, memory‑efficient, and works on plain text
or common compressed formats (gz, bz2, zip).
"""
import argparse
import gzip
import bz2
import json
import os
import re
import sys
import csv
import pathlib
import logging
import datetime
import mimetypes
import hashlib
from typing import Iterable, Tuple, List, Dict, Generator
# ----------------------------------------------------------------------
# Configuration (tweak via CLI args or environment variables)
# ----------------------------------------------------------------------
DEFAULT_EXTS = ".log", ".txt", ".out", ".csv", ".gz", ".bz2", ".zip"
MAX_FILE_SIZE = 100 * 1024 * 1024 # 100 MiB – skip bigger files
MIN_FILE_AGE_DAYS = 0 # 0 = no age filter
TOKEN_LIST = ["username", "passwordlog", "facebook", "install"]
TOKEN_REGEX = re.compile(
r"(?i)^(?=.*\busername\b)(?=.*\bpasswordlog\b)(?=.*\bfacebook\b)(?=.*\binstall\b).*$"
)
# ----------------------------------------------------------------------
# Helper: open file (plain or compressed) as a text stream
# ----------------------------------------------------------------------
def open_text(path: pathlib.Path) -> Iterable[str]:
"""Yield lines from a file, handling gzip/bz2/zip transparently."""
suffix = path.suffix.lower()
if suffix == ".gz":
f = gzip.open(path, mode="rt", encoding="utf-8", errors="ignore")
elif suffix == ".bz2":
f = bz2.open(path, mode="rt", encoding="utf-8", errors="ignore")
elif suffix == ".zip":
import zipfile
z = zipfile.ZipFile(path)
# We only scan the first file inside the zip (most common case)
# If you need multi‑file support, iterate z.namelist()
inner_name = z.namelist()[0]
f = z.open(inner_name, mode="r")
f = (line.decode(errors="ignore") for line in f)
else:
f = open(path, mode="r", encoding="utf-8", errors="ignore")
with f:
for line in f:
yield line.rstrip("\n")
# ----------------------------------------------------------------------
# Core scanner
# ----------------------------------------------------------------------
def scan_file(
path: pathlib.Path,
tokens_regex: re.Pattern = TOKEN_REGEX,
) -> Generator[Dict, None, None]:
"""Yield a hit dict for each matching line."""
try:
for lineno, line in enumerate(open_text(path), start=1):
if tokens_regex.search(line):
# Basic severity heuristic – longer line = more context
severity = "high" if len(line) > 200 else "medium"
yield
"file_path": str(path),
"line_no": lineno,
"matched_line": line,
"severity": severity,
"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
"sha256": file_hash(path),
except Exception as exc:
logging.debug(f"Failed to scan path: exc")
def file_hash(path: pathlib.Path) -> str:
"""SHA‑256 of the first 1 MiB (fast, still unique enough)."""
h = hashlib.sha256()
try:
with open(path, "rb") as f:
h.update(f.read(1024 * 1024))
except Exception:
return "ERROR"
return h.hexdigest()
# ----------------------------------------------------------------------
# Discovery / Filtering
# ----------------------------------------------------------------------
def eligible(path: pathlib.Path) -> bool:
"""Return True if the file passes size/age/type filters."""
if not path.is_file():
return False
if path.suffix.lower() not in DEFAULT_EXTS:
return False
try:
if path.stat().st_size > MAX_FILE_SIZE:
return False
if MIN_FILE_AGE_DAYS:
age = datetime.datetime.now() - datetime.datetime.fromtimestamp(
path.stat().st_mtime
)
if age.days < MIN_FILE_AGE_DAYS:
return False
except Exception:
return False
# Optional MIME‑type sanity check (skip binary blobs)
mime, _ = mimetypes.guess_type(str(path))
if mime and not mime.startswith("text"):
# Allow known compressed types
if not any(path.suffix.lower().endswith(ext) for ext in (".gz",".bz2",".zip")):
return False
return True
def discover(root: pathlib.Path) -> Generator[pathlib.Path, None, None]:
"""Yield every eligible log‑type file under *root*."""
for dirpath, _, filenames in os.walk(root):
for name in filenames:
p = pathlib.Path(dirpath) / name
if eligible(p):
yield p
# ----------------------------------------------------------------------
# Output handling
# ----------------------------------------------------------------------
def emit_json(hit: Dict, stream):
json.dump(hit, stream)
stream.write("\n")
def emit_csv(hit: Dict, writer: csv.DictWriter):
writer.writerow(hit)
# ----------------------------------------------------------------------
# CLI
# ----------------------------------------------------------------------
def parse_args():
parser = argparse.ArgumentParser(
description="Log‑Scanner – find lines that contain ALL of the tokens "
"'username', 'passwordlog', 'facebook', 'install' in any log file."
)
parser.add_argument(
"path",
type=pathlib.Path,
help="Root directory (or single file) to scan."
)
parser.add_argument(
"-o", "--output",
type=argparse.FileType('w'),
default=sys.stdout,
help="Write results to FILE (default: STDOUT)."
)
parser.add_argument(
"--format",
choices=["json", "csv"],
default="json",
help="Result serialization format (default: json)."
)
parser.add_argument(
"--max-size",
type=int,
default=MAX_FILE_SIZE,
help="Maximum file size (bytes) to scan (default: 100 MiB)."
)
parser.add_argument(
"--min-age",
type=int,
default=MIN_FILE_AGE_DAYS,
help="Skip files newer than N days (default: 0 – no filter)."
)
parser.add_argument(
"--debug",
action="store_true",
help="Enable debug logging on STDERR."
)
return parser.parse_args()
def main():
args = parse_args()
if args.debug:
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
# Apply CLI overrides to globals
global MAX_FILE_SIZE, MIN_FILE_AGE_DAYS
MAX_FILE_SIZE = args.max_size
MIN_FILE_AGE_DAYS = args.min_age
# Prepare CSV writer if needed
csv_writer = None
if args.format == "csv":
fieldnames = [
"timestamp", "severity", "file_path", "line_no",
"matched_line", "sha256"
]
csv_writer = csv.DictWriter(args.output, fieldnames=fieldnames)
csv_writer.writeheader()
# Walk & scan
root = args.path
if root.is_file():
candidates = [root] if eligible(root) else []
else:
candidates = list(discover(root))
if not candidates:
logging.info("No eligible log files found under %s", root)
return
for candidate in candidates:
for hit in scan_file(candidate):
if args.format == "json":
emit_json(hit, args.output)
else:
emit_csv(hit, csv_writer)
if __name__ == "__main__":
main()
The search string allintext username filetype log passwordlog facebook install is a perfect storm of poor security practices and powerful search capabilities. It preys on developers who take shortcuts, servers that are misconfigured, and the terrifying efficiency of modern search engines. # Run the scanner and pipe JSON into
If you are a developer, treat this article as a warning: check your public directories right now. If you are a security enthusiast, remember that with great search power comes great responsibility. And if you are a regular user – change your Facebook password, enable 2FA, and hope that the sites you trust have read this article.
The internet never forgets. But neither do Google’s crawlers. And neither will the attackers running this query at this very moment. Index mapping suggestion (put in Kibana → Dev
The extracted usernames and plaintext passwords are tested against Facebook’s login portal. If successful, the attacker gains access to the victim’s social media account, including private messages, friends lists, and connected apps.
Pick yer 
Yer booty is now 1234 

