ImageGrab Module (macOS and Windows only)

doc

https://pillow.readthedocs.io/en/latest/reference/ImageGrab.html, http://effbot.org/imagingbook/imagegrab.htm

Functions

grab

Note

For now, this function can’t grab windows that are layered on top of your window (popups, tooltips, menus, and more) on Windows. See issue#2569.

This example does so-called “record desktop” or “screen cast” (using PyAV):

#! /bin/env python
import time
import signal
import argparse
import json
from multiprocessing import Process, Queue
from Queue import Empty

import av  # https://github.com/mikeboers/PyAV
from PIL import ImageGrab

def _run_capture(args, q):
    def _IntHandler(signum, frame):
        q.put("done")

    signal.signal(signal.SIGINT, _IntHandler)

    bbox = json.loads(args.bbox) if args.bbox else None

    ocont = av.open(args.recordfile, "w")
    vstream = None
    vrate = 24  #

    print("start capturing.")
    while True:
        dimg = ImageGrab.grab(bbox=bbox)
        #dimg = dimg.resize((dimg.width // 4 * 3, dimg.height // 4 * 3))
        if vstream is None:
            vstream = ocont.add_stream('h264', rate=vrate)
            vstream.width = dimg.width
            vstream.height = dimg.height
            vstream.pix_fmt = 'yuv420p'

        vframe = av.VideoFrame.from_image(dimg)
        for p in vstream.encode(vframe):
            ocont.mux(p)
        try:
            r = q.get(block=False, timeout=1.0 // vrate)
            if r:
                break
        except Empty as e:
            pass

    try:
        for p in vstream.encode():
            ocont.mux(p)
    except av.AVError as e:  # End Of File
        pass

    print("done.")
    ocont.close()  # MUST!

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--bbox", type=str,
        help="for example, --bbox='[0, 0, 1280, 769]'")
    parser.add_argument("--recordfile", default="recorded.mp4")
    parser.add_argument(
        "--countdown",
        help="countdown for starting, in secs.", type=float, default=0.5)
    args = parser.parse_args()

    time.sleep(args.countdown)

    q = Queue()
    p = Process(target=_run_capture, args=(args, q,))
    p.start()
    p.join()

To stop this script, press Ctrl-C.

Watch on youtube.com
see also

Capturing Desktop/Speaker (ffmpeg examples)

grabclipboard

Note

When using old Pillow (ex. 2.9.0) on recent Windows (ex. Windows 7), this function doesn’t work. Upgrade Pillow!

This example grabs the image in the clipboard when you press “ALT + [PRNT SCRN]”. For monitoring keyboard inputs, this example uses pynput. This example is basically for Windows (because of keyboard.Key.print_screen), but I think it’s easy to modify this for your platform.

#
# NOTE:
#   Please stop running other capturing applications like 'DropBox' before
#   running this demonstration.
#
from PIL import Image, ImageGrab
# pynput: https://pypi.python.org/pypi/pynput
#   pynput allows you to control and monitor input devices.
from pynput import keyboard

class KC(object):
    def __init__(self):
        self._alt_pressed = False
        self._count = 0

    def on_press(self, key):
        if not self._alt_pressed and (
            key == keyboard.Key.alt_l or
            key == keyboard.Key.alt_r):
            self._alt_pressed = True

    def on_release(self, key):
        if key == keyboard.Key.print_screen and self._alt_pressed:
            # NOTE: old Pillow can't deal with 'BMP bitfields layout'
            #    of recent Windows. Update Pillow.
            img = ImageGrab.grabclipboard()
            if img and not isinstance(img, (list, )):
                #img.show()
                img.save("captured%04d.png" % self._count)
                self._count += 1
        elif key == keyboard.Key.alt_l or key == keyboard.Key.alt_r:
            self._alt_pressed = False
        elif key == keyboard.Key.esc:
            # Stop listener
            return False

if __name__ == '__main__':
    kc = KC()
    with keyboard.Listener(
            on_press=kc.on_press,
            on_release=kc.on_release) as listener:
        listener.join()

Using PyAV, you can directly encode these images to movie like this:

#! /bin/env python
#
# NOTE:
#   Please stop running other capturing applications like 'DropBox' before
#   running this demonstration.
#
from __future__ import division
import sys
import logging

from PIL import Image
from PIL import ImageGrab
from PIL import ImageOps
# pynput: https://pypi.python.org/pypi/pynput
#   pynput allows you to control and monitor input devices.
from pynput import keyboard
import av  # https://github.com/mikeboers/PyAV


class CaptureTask(object):
    def __init__(self, args):
        from math import ceil
        from fractions import gcd

        self._alt_pressed = False
        logging.debug(args)

        self._container = av.open(args.recordfile, "w")

        _gcd = gcd(args.duration_per_shot, 1000)
        framerate = 1000 // _gcd
        self._repeat = int(ceil(args.duration_per_shot / _gcd))
        logging.info("framerate={}, repeat={}".format(
                framerate, self._repeat))

        self._vstream = self._container.add_stream(
            'h264', rate=framerate)
        self._vstream.width = 1280
        self._vstream.height = 768
        self._vstream.pix_fmt = 'yuv420p'

    def on_press(self, key):
        if not self._alt_pressed and (
            key == keyboard.Key.alt_l or
            key == keyboard.Key.alt_r):
            self._alt_pressed = True

    def on_release(self, key):
        if key == keyboard.Key.print_screen and self._alt_pressed:
            # NOTE: old Pillow can't deal with 'BMP bitfields layout'
            #    of recent Windows. Update Pillow.
            img = ImageGrab.grabclipboard()
            if img and not isinstance(img, (list, )):
                dimg = ImageOps.expand(
                    img,
                    border=((self._vstream.width - img.width) // 2,
                            (self._vstream.height - img.height) // 2))
                vframe = av.VideoFrame.from_image(dimg)
                logging.debug(vframe)
                for i in range(self._repeat):
                    for p in self._vstream.encode(vframe):
                        logging.debug(p)
                        self._container.mux(p)

        elif key == keyboard.Key.alt_l or key == keyboard.Key.alt_r:
            self._alt_pressed = False
        elif key == keyboard.Key.esc:
            try:
                # flush the rest in queue.
                for p in self._vstream.encode():
                    logging.debug(p)
                    self._container.mux(p)
            except av.AVError as e:  # End Of File
                pass
            self._container.close()  # MUST!
            # Stop listener
            return False


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--recordfile", default="prntscrns.mp4")
    parser.add_argument("--duration-per-shot", help="in millisecs", type=int, default=1000)
    parser.add_argument("--verbose", action="store_true")
    args = parser.parse_args()
    logging.basicConfig(
        stream=sys.stderr,
        level=logging.DEBUG if args.verbose else logging.INFO)

    ct = CaptureTask(args)
    with keyboard.Listener(
            on_press=ct.on_press,
            on_release=ct.on_release) as listener:
        listener.join()