############### HEADER PROFILES ######################################### ### First we define header profiles. Header profiles are not intended ### to be used directly, but instead are suppose to be inherited by other ### profiles. ################## ### RESOLUTION ### ################## ## These header profile define how the scaling of the mask, the foreground ## and the background occur as a function of the resolution. ## They have been designed for SCANNED documents. ## ## We also set here resolution dependent parameters for the segmenter. ## render-size: any character with #render_size# or less pixels will be ## removed. ## ## edge-size: Defines the thickness (in pixel) of character edges, where ## the color is unreliable ## blurring-size: size (in pixel) of blurring convolutions applied in the ## segmenter. ## ## These parameters have been tuned only in the case of normal quality ## scanned document, and they are the default parameters. ## ## For non-scanned document, or scanned document requiring fast or ## aggressive compression, we prefer resolution independent parameters ## for two reasons: ## 1. Segmentation is easier, and parameters are less critical ## 2. It would represent too much testing and tuning time. # For 300 - 600 DPI scaling there is no need to upsample the mask ### 600 DPI ## H-600dpi: dpi=-600 # Keep the image at 600 DPI upsample=1 subsample=1 # Keep the mask at 600 DPI mask-upsample=1 # Reduce the background to 100 DPI bg-subsample=6 # Reduce the foreground to 50 DPI fg-subsample=12 # Primarily view at 100 DPI target-subsample=6 resolution-multiplier=2 render-size= 5 edge-size=3 blurring-size=3 high-variation-foreground=false thickening=1 ### 500 DPI ## H-500dpi: dpi=-500 # Keep the image at 500 DPI upsample=1 subsample=1 # Keep the mask at 500 DPI mask-upsample=1 # Reduce the background to 100 DPI bg-subsample=5 # Reduce the foreground to 42 DPI fg-subsample=12 # Primarily view at 100 DPI target-subsample=5 resolution-multiplier=2 # Minimum object size in pixels in the 500 DPI mask render-size= 5 edge-size=3 blurring-size=3 high-variation-foreground=false thickening=1 ### 400 DPI ## H-400dpi: dpi=-400 # Keep the image at 400 DPI upsample=1 subsample=1 # Keep the mask at 400 DPI mask-upsample=1 # Reduce the background to 100 DPI bg-subsample=4 # Reduce the foreground to 33 DPI fg-subsample=12 # Primarily view at 100 DPI target-subsample=4 resolution-multiplier=1 # Minimum object size in pixels in the 400 DPI mask render-size= 4 edge-size=3 blurring-size=3 high-variation-foreground=false thickening=1 ### 300 DPI ## H-300dpi: dpi=-300 # Keep the image at 300 DPI upsample=1 subsample=1 # Keep the mask at 300 DPI mask-upsample=1 # Reduce the background to 100 DPI bg-subsample=3 # Reduce the foreground to 25 DPI fg-subsample=12 # Primarily view at 100 DPI target-subsample=3 resolution-multiplier=1 # Minimum object size in pixels in the 300 DPI mask render-size= 3 # edge-size=3 gives soother and slightly smaller background # on most documents, but completely removes the background color in some cases. edge-size=2 blurring-size=3 high-variation-foreground=false thickening=1 ### 300 DPI with an upsampled mask, only for special cases ## H-300dpi-up: dpi=-300 # Resize the image to 200 DPI upsample=2 subsample=3 # Make the mask 400 DPI mask-upsample=2 # Reduce the background to 100 DPI bg-subsample=2 # Reduce the foreground to 33 DPI # Cannot go lower because we must have mask-upsample*fg-subsample<=12 fg-subsample=6 # Primarily view at 100 DPI target-subsample=2 # Minimum object size in pixels in the 300 DPI mask render-size= 3 edge-size=3 blurring-size=3 # the reason this profile is used is for very high sensitivity to text high-variation-foreground= true thickening=1 ## Full upsampling ## NOT USED OR SUPPORTED H-300dpi-up-full: dpi=-300 # Resize the image to 300 DPI upsample=1 subsample=1 # Make the mask 600 DPI mask-upsample=2 # Reduce the background to 100 DPI bg-subsample=3 # Reduce the foreground to 50 DPI # Cannot go lower because we must have mask-upsample*fg-subsample<=12 fg-subsample=6 # Primarily view at 100 DPI target-subsample=3 # Minimum object size in pixels in the 300 DPI mask render-size= 3 edge-size=3 blurring-size=3 high-variation-foreground= false thickening=1 ## 200 DPI with no upsampling on the mask ## H-200dpi: dpi=-200 # Keep the image at 200 DPI upsample=1 subsample=1 # Keep the mask at 200 DPI mask-upsample=1 # Reduce the background to 100 DPI bg-subsample=2 # Reduce the foreground to 25 DPI fg-subsample=8 # Primarily view at 100 DPI target-subsample=2 resolution-multiplier=1 # Minimum object size in pixels in the 200 DPI mask render-size= 2 edge-size=2 blurring-size=2 high-variation-foreground=true thickening=0 ### 200 DPI with an upsampled mask ## H-200dpi-up: dpi=-200 # Keep the image at 200 DPI upsample=1 subsample=1 # Reduce the background to 100 DPI bg-subsample=2 # Primarily view at 100 DPI target-subsample=2 # Set the mask at 400 DPI mask-upsample=2 # Reduce the foreground to 33 DPI fg-subsample=6 # Minimum object size in pixels in the 400 DPI mask render-size= 4 edge-size=3 blurring-size=3 high-variation-foreground=false thickening=1 ### 150 DPI with no upsampling on the mask: should only be used for ### clean scans ## H-150dpi: dpi=-150 # Keep the image at 150 DPI upsample=1 subsample=1 # Keep the mask at 150 DPI mask-upsample=1 # Reduce the background to 150 DPI bg-subsample=1 # Reduce the foreground to 25 DPI fg-subsample=6 # Primarily view at 150 DPI target-subsample=1 resolution-multiplier=1 # Minimum object size in pixels in the 150 DPI mask render-size= 2 edge-size=2 blurring-size=2 high-variation-foreground= true # JB2 clustering must be more conservative conservative= true tolerance-percent=1 thickening=0 ### 150 DPI with an upsampled mask ## H-150dpi-up: dpi=-150 # Resize the image to 100 DPI upsample=2 subsample=3 # Make the mask 300 DPI mask-upsample=3 # Reduce the background to 100 DPI bg-subsample=1 # Reduce the foreground to 25 DPI fg-subsample=4 # Primarily view at 100 DPI target-subsample=1 # Minimum object size in pixels in the 300 DPI mask render-size= 3 edge-size=3 blurring-size=3 high-variation-foreground= false thickening=1 ### 100 DPI with no upsampling on the mask: should only be used for clean scans ## H-100dpi: dpi=-100 # Keep the image at 100 DPI upsample=1 subsample=1 # Make the mask 100 DPI mask-upsample=1 # Reduce the background to 100 DPI bg-subsample=1 # Reduce the foreground to 25 DPI fg-subsample=4 # Primarily view at 100 DPI target-subsample=1 resolution-multiplier=1 # Minimum object size in pixels in the 100 DPI mask render-size= 1 edge-size=1 blurring-size=1 high-variation-foreground= true # JB2 clustering must be more conservative conservative= true tolerance-percent=1 thickening=0 ### 100 DPI with an upsampled mask ## H-100dpi-up: dpi=-100 # Keep the image at 100 DPI upsample=1 subsample=1 # Reduce the background to 100 DPI bg-subsample=1 # Reduce the foreground to 25 DPI fg-subsample=4 # Primarily view at 100 DPI target-subsample=1 # Make the mask 300 DPI mask-upsample=3 # Minimum object size in pixels in the 300 DPI mask render-size=3 edge-size=3 blurring-size=3 high-variation-foreground= false thickening=1 ############### ### QUALITY ### ############### ### Normal quality compression ## H-normal-quality: # We pick a high default quality of 75, this is appropriate for sources # equivalent to 95 quality jpeg. quality=75 # Color JB2 increases the compression ratio, but does not allow two # colors to touch each other in the foreground. So we do not use # color JB2 for normal compression. color-jb2=false # background-floss= true avoids artifacts # but make files 3% larger on average, and on dithered documents, # result into an irregular background. background-floss=true subsample-refine=false limit-mem-usage=false ### Clean compression (Assumes the document is not scanned ) H-clean-quality: # this has not been tuned quality=75 color-jb2=false background-floss=false subsample-refine=false limit-mem-usage=false # Minimum object size in pixels in always 1 pixel render-size= 1 edge-size=1 blurring-size=1 ### Aggressive compression (Lower quality, but smaller.) ## (THIS HEADER PROFILE IS NOT SUPPORTED!) H-aggressive-quality: # The difference between 70 and 75 % quality is barely noticeable in # most documents visually, but it does significantly reduce the # compression ratio quality=70 # Color JB2 increases the compression ratio, but does not allow two # colors to touch each other in the foreground. color-jb2=true background-floss=false subsample-refine=false limit-mem-usage=false edge-size= 3 ### Fast compression (Lower quality, but faster, and using less memory.) ## (THIS HEADER PROFILE IS NOT SUPPORTED!) H-fast-quality: color-jb2=false background-floss=false subsample-refine=false limit-mem-usage=true ## lowering blurring-size turns off time-consuming half-toning and texture ## detection algorithms; however, filter gets much slower. # blurring-size= 2 ####################### ### SOURCE DOCUMENT ### ####################### ### Normal Size Text # This should be fine for documents with 6 point fonts or larger # at 300 DPI. H-normal-text: # Take a pixel whose luminance is between the background average and # the foreground average. Its relative luminance is a number between # 00 (background) and 100 (foreground). # # This Pixel is classified as foreground only if its relative # luminance is larger than threshold-level. threshold-level=75 # must be conservative here, other characters are lost on 200dpi # images, or documents with small low-contrast fonts. pix-filter-level=25 # this parameters has very little influence on most images, # but may be very dramatic on few inhibit-foreback-level=40 # We are biased towards dark letters on a white field inversion-level= 25 # the more aggressive "shape-filter-level= 50" loses too many dots over 'i', # and sometimes small characters shape-filter-level= 40 block-overlap= 1 ### Drawings, drawn Text, handwriting ## H-drawn: H-normal-text inversion-level= 0 pages-per-dict=1 block-overlap= 2 # This option was considered, but is mostly detrimental # high-variation-foreground= true ### Manuscripts are documents with no photos, and paper that is showing age. ## H-manuscript: H-drawn # The ink may have become very pale, so the segmenter must be more # sensitive. As there are no photos, ugly artifacts due to # over-segmentation are less likely. threshold-level= 60 pix-filter-level= 20 shape-filter-level= 25 background-floss=false ### Maps. ## (THIS PROFILE IS NOT SUPPORTED) # Letters are on top of background with many irregular shapes and lines # Maps inherit from "test" rather then "drawn" because # they use a lot of printed text, which is often inverted. H-map: H-normal-text pix-filter-level= 15 shape-filter-level= 25 inversion-level= 10 # high-variation-foreground= true ############### USER PROFILES ########################################### ### Now we define the profiles that are intended for user use. Most of ### these can be created by joining the header profiles together... ################ ### SCANNED ### ################ ################################################################# # general profile for documents which are scanned # they follow the following characteristics # - text may be inverted. # - JB2 matching helps # - foreground color reasonably constant scan600: H-600dpi H-normal-quality H-normal-text description="Scanned 600 DPI Documents" pix-filter-level= 20 ## To reduce the file size: this choice must be tested on more images shape-filter-level= 50 scan500: H-500dpi H-normal-quality H-normal-text description="Scanned 500 DPI Documents" scan400: H-400dpi H-normal-quality H-normal-text description="Scanned 400 DPI Documents" scan300: H-300dpi H-normal-quality H-normal-text description="Scanned 300 DPI Documents" ## H-200dpi-up looks better than H-200dpi, at the expense of file size scan200: H-200dpi-up H-normal-quality H-normal-text description="Scanned 200 DPI Documents" scan150: H-150dpi-up H-normal-quality H-normal-text description="Scanned 150 DPI Documents" scan100: H-100dpi-up H-normal-quality H-normal-text description="Scanned 100 DPI Documents" ############################################################################# # general profile for documents which are drawn rather than machine-printed # they follow the following characteristics # - ink is always darker than background: no inversion # - JB2 matching of little help: no multi-page # - foreground color may vary a lot: high-variation-foreground # # Examples of documents which are drawn # - comics # - manuscript # - blue prints and architectural sketches # - circuit design # - sheet music ############# ### DRAWN ### ############# ### Draw document profile ## Author: P. Haffner ## draw600: H-600dpi H-normal-quality H-drawn description="Drawn Scanned 600DPI Documents" draw500: H-500dpi H-normal-quality H-drawn description="Drawn Scanned 500DPI Documents" draw400: H-400dpi H-normal-quality H-drawn description="Drawn Scanned 400DPI Documents" draw300: H-300dpi H-normal-quality H-drawn description="Drawn Scanned 300DPI Documents" ## H-200dpi-up looks better than H-200dpi, at the expense of file size draw200: H-200dpi-up H-normal-quality H-drawn description="Drawn Scanned 200DPI Documents" draw150: H-150dpi-up H-normal-quality H-drawn description="Drawn Scanned 150DPI Documents" draw100: H-100dpi-up H-normal-quality H-drawn description="Drawn Scanned 100DPI Documents" ################### ### MANUSCRIPTS ### ################### ### manuscript document profile. (300 DPI) ### Draw document profile ## Author: P. Haffner ## manuscript600: H-600dpi H-normal-quality H-manuscript description="Manuscript Scanned 600DPI Documents" manuscript500: H-500dpi H-normal-quality H-manuscript description="Manuscript Scanned 500DPI Documents" manuscript400: H-400dpi H-normal-quality H-manuscript description="Manuscript Scanned 400DPI Documents" manuscript300: H-300dpi H-normal-quality H-manuscript description="Manuscript Scanned 300DPI Documents" ## H-200dpi-up looks better than H-200dpi, at the expense of file size manuscript200: H-200dpi-up H-normal-quality H-manuscript description="Manuscript Scanned 200DPI Documents" manuscript150: H-150dpi-up H-normal-quality H-manuscript description="Manuscript Scanned 150DPI Documents" manuscript100: H-100dpi-up H-normal-quality H-manuscript description="Manuscript Scanned 100DPI Documents" ############################################################################### ############################################################################### ###### EVERYTHING PAST THIS POINT IS UNSUPPORTED AND SHOULD CONSIDERED ###### ###### AS EXPERIMENTAL. ###################################################### ############################################################################### ############################################################################### ## general profile for an aggressive treatment of scanned documents ########### ############################################################################### ## THESE PROFILES HAVE BEEN TESTED FOR SMALL SIZE RATHER THAN QUALITY ######### ############################################################################### ## The compressed file is generally smaller. ## Author: P. Haffner ## aggressive600: H-600dpi H-aggressive-quality H-normal-text description="Scanned 600 DPI Documents" pix-filter-level= 20 ## Reduce the file size... shape-filter-level= 50 aggressive500: H-500dpi H-aggressive-quality H-normal-text description="Aggressive compression for Scanned 500 DPI Documents" aggressive400: H-400dpi H-aggressive-quality H-normal-text description="Aggressive compression for Scanned 400 DPI Documents" aggressive300: H-300dpi H-aggressive-quality H-normal-text description="Aggressive compression for Scanned 300 DPI Documents" ## H-200dpi-up looks better than H-200dpi, but we want to be aggressive! aggressive200: H-200dpi H-aggressive-quality H-normal-text description="Aggressive compression for Scanned 200 DPI Documents" aggressive150: H-150dpi-up H-aggressive-quality H-normal-text description="Aggressive compression for Scanned 150 DPI Documents" aggressive100: H-100dpi-up H-aggressive-quality H-normal-text description="Aggressive compression for Scanned 100 DPI Documents" very-aggressive300: aggressive300 description="Very Aggressive Compression for Scanned Documents" shape-filter-level= 60 pix-filter-level= 50 bg-subsample=4 aggressive= true fast300: H-300dpi H-fast-quality H-normal-text description="Fast compression for Scanned 300 DPI Documents" ############################################################################### ############################################################################### ###### RECOVERY PROFILES: ### ###### Used to improve documents where the scan300 profiles misses ### ###### characters in most cases, these profiles are based on very ### ###### few examples, and probably do not match your documents. ### ############################################################################### ###### Only 300dpi so far, as a recovery profile should only be designed ### ###### after the profile for a given resolution has been fully tuned. ### ############################################################################### ################# RECOVER FINE TEXT ################################ ## WARNING: file will be significantly larger than scan300 fine-text300: H-300dpi-up H-normal-quality H-normal-text description="Scanned 300 DPI Documents With Hard to Segment Text" threshold-level= 50 ################# RECOVER INVERTED TEXT ################################ ## only use when all the document is inverted ## This profile works better than "H-normal-text" when most of the text ## is inverted (white letters on a darker background). invert300: scan300 description="Scanned 300 DPI Documents With Inverted Text" inversion-level= 75 ################# RECOVER FINE INVERTED TEXT ################################ ## WARNING: file will be significantly larger than invert300 fine-invert300: fine-text300 description="Scanned 300 DPI Documents With Hard to Segment Inverted Text" inversion-level= 75 ################# MAP PROFILES ##################################### map600: H-600dpi H-normal-quality H-map description="Scanned 600 DPI Maps" map500: H-500dpi H-normal-quality H-map description="Scanned 500 DPI Maps" map400: H-400dpi H-normal-quality H-map description="Scanned 400 DPI Maps" map300: H-300dpi H-normal-quality H-map description="Scanned 300 DPI Maps" map200: H-200dpi H-normal-quality H-map description="Scanned 200 DPI Maps" map150: H-150dpi-up H-normal-quality H-map description="Scanned 150 DPI Maps" map100: H-100dpi-up H-normal-quality H-map description="Scanned 100 DPI Maps" edge-size= 1 blurring-size= 1 ############## ### CLEAN ### ############## ################################################################## # general profile for documents which are electronically produced. # They need the following characteristics. # - JB2 matching helps # - text may be inverted. # - foreground color reasonably constant # - The background is very constant in regions with text. # - The image has not been significantly reduced in quality # with filters, such as saving low as low quality jpeg. # Postscript produced directly from word processing documents, # screen dumps, and web page postscript dumps are good candidates # for this profile. clean600: H-600dpi H-normal-quality H-normal-text description="Clean (not scanned) 600 DPI Documents" pix-filter-level= 20 threshold-level=50 inhibit-foreback-level= 0 resolution-multiplier=1 clean500: H-500dpi H-clean-quality H-normal-text description="Clean (not scanned) 500 DPI Documents" pix-filter-level= 20 threshold-level=50 inhibit-foreback-level= 0 resolution-multiplier=1 clean400: H-400dpi H-clean-quality H-normal-text description="Clean (not scanned) 400 DPI Documents" pix-filter-level= 20 threshold-level=50 inhibit-foreback-level= 0 clean300: H-300dpi H-clean-quality H-normal-text description="Clean (not scanned) 300 DPI Documents" pix-filter-level= 20 threshold-level=50 inhibit-foreback-level= 0 clean200: H-200dpi H-clean-quality H-normal-text description="Clean (not scanned) 200 DPI Documents" pix-filter-level= 20 threshold-level=50 inhibit-foreback-level= 0 clean150: H-150dpi H-clean-quality H-normal-text description="Clean (not scanned) 150 DPI Documents" pix-filter-level= 20 threshold-level=50 inhibit-foreback-level= 0 clean100: H-100dpi H-clean-quality H-normal-text description="Clean (not scanned) 100 DPI Documents" pix-filter-level= 20 threshold-level=50 inhibit-foreback-level= 0 ############################################################## ## BITONAL PROFILES ### Bitonal (it is preferable to use bitonaltodjvu) ## bitonal: tobitonal=true bitonal600: bitonal dpi=-600 aggressive=true bitonal500: bitonal dpi=-500 aggressive=true bitonal400: bitonal dpi=-400 bitonal300: bitonal dpi=-300 bitonal200: bitonal dpi=-200 bitonal150: bitonal dpi=-150 conservative=true bitonal100: bitonal dpi=-100 lossless=true photo: jb2-format=none photo600: photo dpi=-600 photo500: photo dpi=-500 photo400: photo dpi=-400 photo300: photo dpi=-300 photo200: photo dpi=-200 photo150: photo dpi=-150 photo100: photo dpi=-100 ### This defines the default profile that will be used when the ## --profile= option is not specified. ### This defines the default profile that will be used when the ## --profile= option is not specified. documenttodjvu: scan300