Si vous voulez créer une image d'une page web il existe plusieurs solution. Le plus simple avec python est d'utiliser le script webkit2png.py de webkit2png développé par Paul Hammond:
webkit2png.py http://www.google.com/
le script webkit2png.py:

#!/usr/bin/python# webkit2png - makes screenshots of web pages# http://www.paulhammond.org/webkit2png__version__ = "0.8-dev"# Copyright (c) 2004-2014 Paul Hammond## Permission is hereby granted, free of charge, to any person obtaining a copy# of this software and associated documentation files (the "Software"), to deal# in the Software without restriction, including without limitation the rights# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell# copies of the Software, and to permit persons to whom the Software is# furnished to do so, subject to the following conditions:## The above copyright notice and this permission notice shall be included in# all copies or substantial portions of the Software.## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN# THE SOFTWARE.#import sysimport optparseimport reimport ostry:import Foundationimport WebKitimport AppKitimport Quartzimport objcexcept ImportError:print "Cannot find pyobjc library files. Are you sure it is installed?"sys.exit()class AppDelegate(Foundation.NSObject):# what happens when the app starts updef applicationDidFinishLaunching_(self, aNotification):webview = aNotification.object().windows()[0].contentView()webview.frameLoadDelegate().getURL(webview)self.performSelector_withObject_afterDelay_("timeout:", None, self.timeout)def timeout_(self, obj):Foundation.NSLog("timed out!")AppKit.NSApplication.sharedApplication().terminate_(None)class Webkit2PngScriptBridge(Foundation.NSObject):def init(self):self = super(Webkit2PngScriptBridge, self).init()self.is_stopped = Falseself.start_callback = Falsereturn selfdef stop(self):self.is_stopped = Truedef start(self):self.is_stopped = Falseself.start_callback()def isSelectorExcludedFromWebScript_(self, sel):if sel in ['stop', 'start']:return Falseelse:return Trueclass WebkitLoad (Foundation.NSObject, WebKit.protocols.WebFrameLoadDelegate):# what happens if something goes wrong while loadingdef webView_didFailLoadWithError_forFrame_(self, webview, error, frame):if error.code() == Foundation.NSURLErrorCancelled:returnprint " ... something went wrong: "+error.localizedDescription()self.getURL(webview)def webView_didFailProvisionalLoadWithError_forFrame_(self, webview, error, frame):if error.code() == Foundation.NSURLErrorCancelled:returnprint " ... something went wrong: "+error.localizedDescription()self.getURL(webview)def makeFilename(self, URL, options):# make the filenameif options.filename:filename = options.filenameelif options.md5:try:import md5except ImportError:print "--md5 requires python md5 library"AppKit.NSApplication.sharedApplication().terminate_(None)filename = md5.new(URL).hexdigest()else:filename = re.sub('^https?', '', URL)filename = re.sub('\W', '', filename)if options.datestamp:import timenow = time.strftime("%Y%m%d")filename = now + "-" + filenamedir = os.path.abspath(os.path.expanduser(options.dir))if not os.path.exists(options.dir):os.makedirs(dir)return os.path.join(dir, filename)def saveImages(self, bitmapdata, filename, options):# save the fullsize pngif options.fullsize:bitmapdata.representationUsingType_properties_(AppKit.NSPNGFileType, None).writeToFile_atomically_(filename + "-full.png", objc.YES)if options.thumb or options.clipped:# work out how big the thumbnail iswidth = bitmapdata.pixelsWide()height = bitmapdata.pixelsHigh()thumbWidth = (width * options.scale)thumbHeight = (height * options.scale)# make the thumbnails in a scratch imagescratch = AppKit.NSImage.alloc().initWithSize_(Foundation.NSMakeSize(thumbWidth, thumbHeight))scratch.lockFocus()AppKit.NSGraphicsContext.currentContext().setImageInterpolation_(AppKit.NSImageInterpolationHigh)thumbRect = Foundation.NSMakeRect(0.0, 0.0, thumbWidth, thumbHeight)clipRect = Foundation.NSMakeRect(0.0, thumbHeight-options.clipheight, options.clipwidth, options.clipheight)bitmapdata.drawInRect_(thumbRect)thumbOutput = AppKit.NSBitmapImageRep.alloc().initWithFocusedViewRect_(thumbRect)clipOutput = AppKit.NSBitmapImageRep.alloc().initWithFocusedViewRect_(clipRect)scratch.unlockFocus()# save the thumbnails as pngsif options.thumb:thumbOutput.representationUsingType_properties_(AppKit.NSPNGFileType, None).writeToFile_atomically_(filename + "-thumb.png", objc.YES)if options.clipped:clipOutput.representationUsingType_properties_(AppKit.NSPNGFileType, None).writeToFile_atomically_(filename + "-clipped.png", objc.YES)def getURL(self, webview):if self.urls:if self.urls[0] == '-':url = sys.stdin.readline().rstrip()if not url:AppKit.NSApplication.sharedApplication().terminate_(None)else:url = self.urls.pop(0)else:AppKit.NSApplication.sharedApplication().terminate_(None)nsurl = Foundation.NSURL.URLWithString_(url)if not (nsurl and nsurl.scheme()):nsurl = Foundation.NSURL.alloc().initFileURLWithPath_(url)nsurl = nsurl.absoluteURL()if self.options.ignore_ssl_check:Foundation.NSURLRequest.setAllowsAnyHTTPSCertificate_forHost_(objc.YES, nsurl.host())print "Fetching", nsurl, "..."self.resetWebview(webview)scriptobject = webview.windowScriptObject()scriptobject.setValue_forKey_(Webkit2PngScriptBridge.alloc().init(), 'webkit2png')webview.mainFrame().loadRequest_(Foundation.NSURLRequest.requestWithURL_(nsurl))if not webview.mainFrame().provisionalDataSource():print " ... not a proper url?"self.getURL(webview)def resetWebview(self, webview):rect = Foundation.NSMakeRect(0, 0, self.options.initWidth, self.options.initHeight)window = webview.window()window.setContentSize_((self.options.initWidth, self.options.initHeight))if self.options.transparent:window.setOpaque_(objc.NO)window.setBackgroundColor_(AppKit.NSColor.clearColor())webview.setDrawsBackground_(objc.NO)webview.setFrame_(rect)def captureView(self, view):bounds = view.bounds()if bounds.size.height > self.options.UNSAFE_max_height:print >> sys.stderr, "Error: page height greater than %s, clipping to avoid crashing windowserver." % self.options.UNSAFE_max_heightbounds.size.height = self.options.UNSAFE_max_heightif bounds.size.width > self.options.UNSAFE_max_width:print >> sys.stderr, "Error: page width greater than %s, clipping to avoid crashing windowserver." % self.options.UNSAFE_max_widthbounds.size.width = self.options.UNSAFE_max_widthview.window().display()view.window().setContentSize_(Foundation.NSSize(self.options.initWidth, self.options.initHeight))view.setFrame_(bounds)if hasattr(view, "bitmapImageRepForCachingDisplayInRect_"):bitmapdata = view.bitmapImageRepForCachingDisplayInRect_(bounds)view.cacheDisplayInRect_toBitmapImageRep_(bounds, bitmapdata)else:view.lockFocus()bitmapdata = AppKit.NSBitmapImageRep.alloc()bitmapdata.initWithFocusedViewRect_(bounds)view.unlockFocus()return bitmapdata# what happens when the page has finished loadingdef webView_didFinishLoadForFrame_(self, webview, frame):# don't care about subframesif (frame == webview.mainFrame()):scriptobject = webview.windowScriptObject()if self.options.js:scriptobject.evaluateWebScript_(self.options.js)bridge = scriptobject.valueForKey_('webkit2png')def doGrab():Foundation.NSTimer.scheduledTimerWithTimeInterval_target_selector_userInfo_repeats_(self.options.delay, self, self.doGrab, webview, False)if bridge.is_stopped:bridge.start_callback = doGrabelse:doGrab()def doGrab(self, timer):webview = timer.userInfo()frame = webview.mainFrame()view = frame.frameView().documentView()URL = webview.mainFrame().dataSource().initialRequest().URL().absoluteString()filename = self.makeFilename(URL, self.options)bitmapdata = self.captureView(view)if self.options.selector:doc = frame.DOMDocument()el = doc.querySelector_(self.options.selector)if not el:print " ... no element matching %s found?" % self.options.selectorself.getURL(webview)returnleft, top = 0, 0parent = elwhile parent:left += parent.offsetLeft()top += parent.offsetTop()parent = parent.offsetParent()zoom = self.options.zoomcropRect = view.window().convertRectToBacking_(Foundation.NSMakeRect(zoom * left, zoom * top, zoom * el.offsetWidth(), zoom * el.offsetHeight()))cropped = Quartz.CGImageCreateWithImageInRect(bitmapdata.CGImage(), cropRect)bitmapdata = AppKit.NSBitmapImageRep.alloc().initWithCGImage_(cropped)Quartz.CGImageRelease(cropped)self.saveImages(bitmapdata, filename, self.options)print " ... done"self.getURL(webview)def main():# parse the command lineusage = """%prog [options] [http://example.net/ ...]Examples:%prog http://google.com/ # screengrab google%prog -W 1000 -H 1000 http://google.com/ # bigger screengrab of google%prog -T http://google.com/ # just the thumbnail screengrab%prog -TF http://google.com/ # just thumbnail and fullsize grab%prog -o foo http://google.com/ # save images as "foo-thumb.png" etc%prog - # screengrab urls from stdin%prog /path/to/file.html # screengrab local html file%prog -h | less # full documentation"""cmdparser = optparse.OptionParser(usage, version=("webkit2png " + __version__))# TODO: add quiet/verbose optionscmdparser.add_option("--debug", action="store_true",help=optparse.SUPPRESS_HELP)# warning: setting these too high can crash your window servercmdparser.add_option("--UNSAFE-max-height", type="int", default=30000,help=optparse.SUPPRESS_HELP)cmdparser.add_option("--UNSAFE-max-width", type="int", default=30000,help=optparse.SUPPRESS_HELP)group = optparse.OptionGroup(cmdparser, "Network Options")group.add_option("--timeout", type="float", default=60.0,help="page load timeout (default: 60)")group.add_option("--user-agent", type="string", default=False,help="set user agent header")group.add_option("--ignore-ssl-check", action="store_true", default=False,help="ignore SSL Certificate name mismatches")cmdparser.add_option_group(group)group = optparse.OptionGroup(cmdparser, "Browser Window Options")group.add_option("-W", "--width", type="float", default=800.0,help="initial (and minimum) width of browser (default: 800)")group.add_option("-H", "--height", type="float", default=600.0,help="initial (and minimum) height of browser (default: 600)")group.add_option("-z", "--zoom", type="float", default=1.0,help='zoom level of browser, equivalent to "Zoom In" and "Zoom Out" in "View" menu (default: 1.0)')group.add_option("--selector", type="string",help="CSS selector for a single element to capture (first matching element will be used)")cmdparser.add_option_group(group)group = optparse.OptionGroup(cmdparser, "Output size options")group.add_option("-F", "--fullsize", action="store_true",help="only create fullsize screenshot")group.add_option("-T", "--thumb", action="store_true",help="only create thumbnail sreenshot")group.add_option("-C", "--clipped", action="store_true",help="only create clipped thumbnail screenshot")group.add_option("--clipwidth", type="float", default=200.0,help="width of clipped thumbnail (default: 200)",metavar="WIDTH")group.add_option("--clipheight", type="float", default=150.0,help="height of clipped thumbnail (default: 150)",metavar="HEIGHT")group.add_option("-s", "--scale", type="float", default=0.25,help="scale factor for thumbnails (default: 0.25)")cmdparser.add_option_group(group)group = optparse.OptionGroup(cmdparser, "Output filename options")group.add_option("-D", "--dir", type="string", default="./",help="directory to place images into")group.add_option("-o", "--filename", type="string", default="",metavar="NAME", help="save images as NAME-full.png,NAME-thumb.png etc")group.add_option("-m", "--md5", action="store_true",help="use md5 hash for filename (like del.icio.us)")group.add_option("-d", "--datestamp", action="store_true",help="include date in filename")cmdparser.add_option_group(group)group = optparse.OptionGroup(cmdparser, "Web page functionality")group.add_option("--delay", type="float", default=0,help="delay between page load finishing and screenshot")group.add_option("--js", type="string", default=None,help="JavaScript to execute when the window finishes loading""(example: --js='document.bgColor=\"red\";'). ""If you need to wait for asynchronous code to finish before ""capturing the screenshot, call webkit2png.stop() before the ""async code runs, then webkit2png.start() to capture the image.")group.add_option("--noimages", action="store_true",help=optparse.SUPPRESS_HELP)group.add_option("--no-images", action="store_true",help="don't load images")group.add_option("--nojs", action="store_true",help=optparse.SUPPRESS_HELP)group.add_option("--no-js", action="store_true",help="disable JavaScript support")group.add_option("--transparent", action="store_true",help="render output on a transparent background (requires a web ""page with a transparent background)", default=False)cmdparser.add_option_group(group)(options, args) = cmdparser.parse_args()if len(args) == 0:cmdparser.print_usage()returnif options.filename:if len(args) != 1 or args[0] == "-":print "--filename option requires exactly one url"return# deprecated optionsif options.nojs:print >> sys.stderr, 'Warning: --nojs will be removed in webkit2png 1.0. Please use --no-js.'options.no_js = Trueif options.noimages:print >> sys.stderr, 'Warning: --noimages will be removed in webkit2png 1.0. Please use --no-images.'options.no_images = Trueif options.scale == 0:cmdparser.error("scale cannot be zero")# make sure we're outputing somethingif not (options.fullsize or options.thumb or options.clipped):options.fullsize = Trueoptions.thumb = Trueoptions.clipped = True# work out the initial size of the browser window# (this might need to be larger so clipped image is right size)options.initWidth = (options.clipwidth / options.scale)options.initHeight = (options.clipheight / options.scale)options.width *= options.zoomif options.width > options.initWidth:options.initWidth = options.widthif options.height > options.initHeight:options.initHeight = options.height# Hide the dock icon (needs to run before NSApplication.sharedApplication)AppKit.NSBundle.mainBundle().infoDictionary()['LSBackgroundOnly'] = '1'app = AppKit.NSApplication.sharedApplication()# create an app delegatedelegate = AppDelegate.alloc().init()delegate.timeout = options.timeoutAppKit.NSApp().setDelegate_(delegate)# create a windowrect = Foundation.NSMakeRect(0, 0, 100, 100)win = AppKit.NSWindow.alloc()win.initWithContentRect_styleMask_backing_defer_(rect, AppKit.NSBorderlessWindowMask, 2, 0)if options.debug:win.orderFrontRegardless()# create a webview objectwebview = WebKit.WebView.alloc()webview.initWithFrame_(rect)# turn off scrolling so the content is actually x wide and not x-15webview.mainFrame().frameView().setAllowsScrolling_(objc.NO)if options.user_agent:webview.setCustomUserAgent_(options.user_agent)else:webkit_version = Foundation.NSBundle.bundleForClass_(WebKit.WebView).objectForInfoDictionaryKey_(WebKit.kCFBundleVersionKey)[1:]webview.setApplicationNameForUserAgent_("Like-Version/6.0 Safari/%s webkit2png/%s" % (webkit_version, __version__))webview.setPreferencesIdentifier_('webkit2png')webview.preferences().setLoadsImagesAutomatically_(not options.no_images)webview.preferences().setJavaScriptEnabled_(not options.no_js)if options.zoom != 1.0:webview._setZoomMultiplier_isTextOnly_(options.zoom, False)# add the webview to the windowwin.setContentView_(webview)# create a LoadDelegateloaddelegate = WebkitLoad.alloc().init()loaddelegate.options = optionsloaddelegate.urls = argswebview.setFrameLoadDelegate_(loaddelegate)app.run()if __name__ == '__main__':main()
Recherches associées
| Liens | Site |
|---|---|
| webkit2png | paulhammond |
| How can I take a screenshot/image of a website using Python? | stackoverflow |
| PyWebShot - Generate website thumbnails using Python | coderholic |
| Python: How to Capture WebPage as Image File? | stackoverflow |
| How to save web page as image using python | stackoverflow |
| websnapr 2.0 - website screenshots | websnapr |
| Webpage screenshots with webkit | webscraping |
