2012-10-16 02:02:38 +00:00
/*
* * k2pdfopt . c K2pdfopt optimizes PDF / DJVU files for mobile e - readers
* * ( e . g . the Kindle ) and smartphones . It works well on
* * multi - column PDF / DJVU files . K2pdfopt is freeware .
* *
* * Copyright ( C ) 2012 http : //willus.com
* *
* * This program is free software : you can redistribute it and / or modify
* * it under the terms of the GNU Affero General Public License as
* * published by the Free Software Foundation , either version 3 of the
* * License , or ( at your option ) any later version .
* *
* * This program is distributed in the hope that it will be useful ,
* * but WITHOUT ANY WARRANTY ; without even the implied warranty of
* * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* * GNU Affero General Public License for more details .
* *
* * You should have received a copy of the GNU Affero General Public License
* * along with this program . If not , see < http : //www.gnu.org/licenses/>.
* *
/*
* * WILLUSDEBUGX flags :
* * 1 = Generic
* * 2 = breakinfo row analysis
* * 4 = word wrapping
* * 8 = word wrapping II
* * 16 = hyphens
* * 32 = OCR
* *
*/
// #define WILLUSDEBUGX 32
// #define WILLUSDEBUG
# include <stdio.h>
# include <stdlib.h>
# include <stdarg.h>
# include <string.h>
# include <ctype.h>
# include <math.h>
2012-10-18 17:20:48 +00:00
# include "k2pdfopt.h"
2012-10-16 02:02:38 +00:00
# define HAVE_MUPDF
# define VERSION "v1.51"
# define GRAYLEVEL(r,g,b) ((int)(((r)*0.3+(g)*0.59+(b)*0.11)*1.002))
# if (defined(WIN32) || defined(WIN64))
# define TTEXT_BOLD ANSI_WHITE
# define TTEXT_NORMAL ANSI_NORMAL
# define TTEXT_BOLD2 ANSI_YELLOW
# define TTEXT_INPUT ANSI_GREEN
# define TTEXT_WARN ANSI_RED
# define TTEXT_HEADER ANSI_CYAN
# define TTEXT_MAGENTA ANSI_MAGENTA
# else
# define TTEXT_BOLD "\x1b[0m\x1b[34m"
# define TTEXT_NORMAL "\x1b[0m"
# define TTEXT_BOLD2 "\x1b[0m\x1b[33m"
# define TTEXT_INPUT "\x1b[0m\x1b[32m"
# define TTEXT_WARN "\x1b[0m\x1b[31m"
# define TTEXT_HEADER "\x1b[0m\x1b[36m"
# define TTEXT_MAGENTA "\x1b[0m\x1b[35m"
# endif
# ifndef __ANSI_H__
# define ANSI_RED "\x1b[1m\x1b[31m"
# define ANSI_GREEN "\x1b[1m\x1b[32m"
# define ANSI_YELLOW "\x1b[1m\x1b[33m"
# define ANSI_BROWN "\x1b[0m\x1b[33m"
# define ANSI_BLUE "\x1b[1m\x1b[34m"
# define ANSI_MAGENTA "\x1b[1m\x1b[35m"
# define ANSI_CYAN "\x1b[1m\x1b[36m"
# define ANSI_WHITE "\x1b[1m\x1b[37m"
# define ANSI_NORMAL "\x1b[0m\x1b[37m"
# define ANSI_SAVE_CURSOR "\x1b[s"
# define ANSI_RESTORE_CURSOR "\x1b[u"
# define ANSI_CLEAR_TO_END "\x1b[K"
# define ANSI_BEGIN_LINE "\x1b[80D"
# define ANSI_UP_ONE_LINE "\x1b[1A"
# define ANSI_HOME "\x1b[2J\x1b[0;0;H"
# define __ANSI_H__
# endif
/* bmp.c */
# define WILLUSBITMAP_TYPE_NATIVE 0
# define WILLUSBITMAP_TYPE_WIN32 1
2012-10-27 08:03:49 +00:00
# define BOUND(x,xmin,xmax) if ((x)<(xmin)) (x)=(xmin); else { if ((x)>(xmax)) (x)=(xmax); }
2012-10-16 02:02:38 +00:00
# ifdef PI
# undef PI
# endif
/*
* * Constants from the front of the CRC standard math tables
* * ( Accuracy = 50 digits )
*/
2012-10-20 08:43:34 +00:00
# define PI 3.14159265358979323846264338327950288419716939937511
# define SQRT2 1.41421356237309504880168872420969807856967187537695
# define SQRT3 1.73205080756887729352744634150587236694280525381039
# define LOG10E 0.43429448190325182765112891891660508229439700580367
2012-10-16 02:02:38 +00:00
# define DBPERNEP (20.*LOG10E)
# define SRC_TYPE_PDF 1
# define SRC_TYPE_DJVU 2
# define SRC_TYPE_OTHER 3
/* DATA STRUCTURES */
typedef struct {
int page ; /* Source page */
double rot_deg ; /* Source rotation (happens first) */
double x0 , y0 ; /* x0,y0, in points, of lower left point on rectangle */
double w , h ; /* width and height of rectangle in points */
double scale ; /* Scale rectangle by this factor on destination page */
double x1 , y1 ; /* (x,y) position of lower left point on destination page, in points */
} PDFBOX ;
typedef struct {
PDFBOX * box ;
int n ;
int na ;
} PDFBOXES ;
typedef struct {
int pageno ; /* Source page number */
double page_rot_deg ; /* Source page rotation */
PDFBOXES boxes ;
} PAGEINFO ;
typedef struct {
int ch ; /* Hyphen starting point -- < 0 for no hyphen */
int c2 ; /* End of end region if hyphen is erased */
int r1 ; /* Top of hyphen */
int r2 ; /* Bottom of hyphen */
} HYPHENINFO ;
typedef struct {
int c1 , c2 ; /* Left and right columns */
int r1 , r2 ; /* Top and bottom of region in pixels */
int rowbase ; /* Baseline of row */
int gap ; /* Gap to next region in pixels */
int rowheight ; /* text + gap */
int capheight ;
int h5050 ;
int lcheight ;
HYPHENINFO hyphen ;
} TEXTROW ;
typedef struct {
TEXTROW * textrow ;
int rhmean_pixels ; /* Mean row height (text) */
int centered ; /* Is this set of rows centered? */
int n , na ;
} BREAKINFO ;
typedef struct {
int red [ 256 ] ;
int green [ 256 ] ;
int blue [ 256 ] ;
unsigned char * data ; /* Top to bottom in native type, bottom to */
/* top in Win32 type. */
int width ; /* Width of image in pixels */
int height ; /* Height of image in pixels */
int bpp ; /* Bits per pixel (only 8 or 24 allowed) */
int size_allocated ;
int type ; /* See defines above for WILLUSBITMAP_TYPE_... */
} WILLUSBITMAP ;
typedef struct {
int r1 , r2 ; /* row position from top of bmp, inclusive */
int c1 , c2 ; /* column positions, inclusive */
int rowbase ; /* Baseline of text row */
int capheight ; /* capital letter height */
int h5050 ;
int lcheight ; /* lower-case letter height */
int bgcolor ; /* 0 - 255 */
HYPHENINFO hyphen ;
WILLUSBITMAP * bmp ;
WILLUSBITMAP * bmp8 ;
WILLUSBITMAP * marked ;
} BMPREGION ;
typedef struct {
WILLUSBITMAP bmp ;
int rows ;
int published_pages ;
int bgcolor ;
int fit_to_page ;
int wordcount ;
char debugfolder [ 256 ] ;
} MASTERINFO ;
static int verbose = 0 ;
static int debug = 0 ;
# define DEFAULT_WIDTH 600
# define DEFAULT_HEIGHT 800
# define MIN_REGION_WIDTH_INCHES 1.0
# define SRCROT_AUTO -999.
# define SRCROT_AUTOEP -998.
/*
* * Blank Area Threshold Widths - - average black pixel width , in inches , that
* * prevents a region from being determined as " blank " or clear .
*/
2012-10-27 14:38:53 +00:00
static int src_rot = 0 ;
2012-10-16 02:02:38 +00:00
static double gtc_in = .005 ; // detecting gap between columns
static double gtr_in = .006 ; // detecting gap between rows
static double gtw_in = .0015 ; // detecting gap between words
// static double gtm_in=.005; // detecting margins for trimming
static int src_left_to_right = 1 ;
static int src_whitethresh = - 1 ;
static int dst_dpi = 167 ;
static int fit_columns = 1 ;
static int src_dpi = 300 ;
static int dst_width = DEFAULT_WIDTH ; /* Full device width in pixels */
static int dst_height = DEFAULT_HEIGHT ;
static int dst_userwidth = DEFAULT_WIDTH ;
static int dst_userheight = DEFAULT_HEIGHT ;
static int dst_justify = - 1 ; // 0 = left, 1 = center
static int dst_figure_justify = - 1 ; // -1 = same as dst_justify. 0=left 1=center 2=right
static double dst_min_figure_height_in = 0.75 ;
static int dst_fulljustify = - 1 ; // 0 = no, 1 = yes
static int dst_color = 0 ;
static int dst_landscape = 0 ;
2012-10-27 08:03:49 +00:00
static int src_autostraighten = 0 ;
2012-10-26 09:20:56 +00:00
static double dst_mar = 0.06 ;
2012-10-16 02:02:38 +00:00
static double dst_martop = - 1.0 ;
static double dst_marbot = - 1.0 ;
static double dst_marleft = - 1.0 ;
static double dst_marright = - 1.0 ;
static double min_column_gap_inches = 0.1 ;
static double max_column_gap_inches = 1.5 ; // max gap between columns
static double min_column_height_inches = 1.5 ;
static double mar_top = - 1.0 ;
static double mar_bot = - 1.0 ;
static double mar_left = - 1.0 ;
static double mar_right = - 1.0 ;
static double max_region_width_inches = 3.6 ; /* Max viewable width (device width minus margins) */
static int max_columns = 2 ;
static double column_gap_range = 0.33 ;
static double column_offset_max = 0.2 ;
static double column_row_gap_height_in = 1. / 72. ;
static int text_wrap = 1 ;
static double word_spacing = 0.375 ;
static double display_width_inches = 3.6 ; /* Device width = dst_width / dst_dpi */
static int column_fitted = 0 ;
static double lm_org , bm_org , tm_org , rm_org , dpi_org ;
static double contrast_max = 2.0 ;
static int show_marked_source = 0 ;
static double defect_size_pts = 1.0 ;
static double max_vertical_gap_inches = 0.25 ;
static double vertical_multiplier = 1.0 ;
static double vertical_line_spacing = - 1.2 ;
static double vertical_break_threshold = 1.75 ;
static int erase_vertical_lines = 0 ;
static int k2_hyphen_detect = 1 ;
static int dst_fit_to_page = 0 ;
/*
* * Undocumented cmd - line args
*/
static double no_wrap_ar_limit = 0.2 ; /* -arlim */
static double no_wrap_height_limit_inches = 0.55 ; /* -whmax */
static double little_piece_threshold_inches = 0.5 ; /* -rwmin */
/*
* * Keeping track of vertical gaps
*/
static double last_scale_factor_internal = - 1.0 ;
/* indicates desired vert. gap before next region is added. */
static int last_rowbase_internal ; /* Pixels between last text row baseline and current end */
/* of destination bitmap. */
static int beginning_gap_internal = - 1 ;
static int last_h5050_internal = - 1 ;
static int just_flushed_internal = 0 ;
static int gap_override_internal ; /* If > 0, apply this gap in wrapbmp_flush() and then reset. */
void adjust_params_init ( void ) ;
void set_region_widths ( void ) ;
static void mark_source_page ( BMPREGION * region , int caller_id , int mark_flags ) ;
static void fit_column_to_screen ( double column_width_inches ) ;
static void restore_output_dpi ( void ) ;
void adjust_contrast ( WILLUSBITMAP * src , WILLUSBITMAP * srcgrey , int * white ) ;
static int bmpregion_row_black_count ( BMPREGION * region , int r0 ) ;
static void bmpregion_row_histogram ( BMPREGION * region ) ;
static int bmpregion_find_multicolumn_divider ( BMPREGION * region ,
int * row_black_count , BMPREGION * pageregion , int * npr , int * colcount ,
int * rowcount ) ;
static int bmpregion_column_height_and_gap_test ( BMPREGION * column ,
BMPREGION * region , int r1 , int r2 , int cmid , int * colcount ,
int * rowcount ) ;
static int bmpregion_is_clear ( BMPREGION * region , int * row_is_clear ,
double gt_in ) ;
void bmpregion_multicolumn_add ( BMPREGION * region , MASTERINFO * masterinfo ,
int level , PAGEINFO * pageinfo , int colgap0_pixels ) ;
static void bmpregion_vertically_break ( BMPREGION * region ,
MASTERINFO * masterinfo , int allow_text_wrapping , double force_scale ,
int * colcount , int * rowcount , PAGEINFO * pageinfo , int colgap_pixels ,
int ncols ) ;
static void bmpregion_add ( BMPREGION * region , BREAKINFO * breakinfo ,
MASTERINFO * masterinfo , int allow_text_wrapping , int trim_flags ,
int allow_vertical_breaks , double force_scale , int justify_flags ,
int caller_id , int * colcount , int * rowcount , PAGEINFO * pageinfo ,
int mark_flags , int rowbase_delta ) ;
static void dst_add_gap_src_pixels ( char * caller , MASTERINFO * masterinfo ,
int pixels ) ;
static void dst_add_gap ( MASTERINFO * masterinfo , double inches ) ;
static void bmp_src_to_dst ( MASTERINFO * masterinfo , WILLUSBITMAP * src ,
int justification_flags , int whitethresh , int nocr , int dpi ) ;
static void bmp_fully_justify ( WILLUSBITMAP * jbmp , WILLUSBITMAP * src , int nocr ,
int whitethresh , int just ) ;
# ifdef HAVE_OCR
static void ocrwords_fill_in ( OCRWORDS * words , WILLUSBITMAP * src , int whitethresh , int dpi ) ;
# endif
static void bmpregion_trim_margins ( BMPREGION * region , int * colcount0 ,
int * rowcount0 , int flags ) ;
static void bmpregion_hyphen_detect ( BMPREGION * region ) ;
# if (WILLUSDEBUGX & 6)
static void breakinfo_echo ( BREAKINFO * bi ) ;
# endif
# if (defined(WILLUSDEBUGX) || defined(WILLUSDEBUG))
static void bmpregion_write ( BMPREGION * region , char * filename ) ;
# endif
static int height2_calc ( int * rc , int n ) ;
static void trim_to ( int * count , int * i1 , int i2 , double gaplen ) ;
static void bmpregion_analyze_justification_and_line_spacing ( BMPREGION * region ,
BREAKINFO * breakinfo , MASTERINFO * masterinfo , int * colcount ,
int * rowcount , PAGEINFO * pageinfo , int allow_text_wrapping ,
double force_scale ) ;
static int bmpregion_is_centered ( BMPREGION * region , BREAKINFO * breakinfo ,
int i1 , int i2 , int * textheight ) ;
static double median_val ( double * x , int n ) ;
static void bmpregion_find_vertical_breaks ( BMPREGION * region ,
BREAKINFO * breakinfo , int * colcount , int * rowcount , double apsize_in ) ;
static void textrow_assign_bmpregion ( TEXTROW * textrow , BMPREGION * region ) ;
static void breakinfo_compute_row_gaps ( BREAKINFO * breakinfo , int r2 ) ;
static void breakinfo_compute_col_gaps ( BREAKINFO * breakinfo , int c2 ) ;
static void breakinfo_remove_small_col_gaps ( BREAKINFO * breakinfo , int lcheight ,
double mingap ) ;
static void breakinfo_remove_small_rows ( BREAKINFO * breakinfo , double fracrh ,
double fracgap , BMPREGION * region , int * colcount , int * rowcount ) ;
static void breakinfo_alloc ( int index , BREAKINFO * breakinfo , int nrows ) ;
static void breakinfo_free ( int index , BREAKINFO * breakinfo ) ;
static void breakinfo_sort_by_gap ( BREAKINFO * breakinfo ) ;
static void breakinfo_sort_by_row_position ( BREAKINFO * breakinfo ) ;
static void bmpregion_one_row_find_breaks ( BMPREGION * region ,
BREAKINFO * breakinfo , int * colcount , int * rowcount , int add_to_dbase ) ;
void wrapbmp_init ( void ) ;
static int wrapbmp_ends_in_hyphen ( void ) ;
static void wrapbmp_set_color ( int is_color ) ;
static void wrapbmp_free ( void ) ;
static void wrapbmp_set_maxgap ( int value ) ;
static int wrapbmp_width ( void ) ;
static int wrapbmp_remaining ( void ) ;
static void wrapbmp_add ( BMPREGION * region , int gap , int line_spacing , int rbase ,
int gio , int justification_flags ) ;
static void wrapbmp_flush ( MASTERINFO * masterinfo , int allow_full_justify ,
PAGEINFO * pageinfo , int use_bgi ) ;
static void wrapbmp_hyphen_erase ( void ) ;
static void bmpregion_one_row_wrap_and_add ( BMPREGION * region ,
BREAKINFO * breakinfo , int index , int i0 , int i1 , MASTERINFO * masterinfo ,
int justflags , int * colcount , int * rowcount , PAGEINFO * pageinfo ,
int rheight , int mean_row_gap , int rowbase , int marking_flags , int pi ) ;
static void white_margins ( WILLUSBITMAP * src , WILLUSBITMAP * srcgrey ) ;
static void get_white_margins ( BMPREGION * region ) ;
/* Bitmap orientation detection functions */
static double bitmap_orientation ( WILLUSBITMAP * bmp ) ;
static double bmp_inflections_vertical ( WILLUSBITMAP * srcgrey , int ndivisions ,
int delta , int * wthresh ) ;
static double bmp_inflections_horizontal ( WILLUSBITMAP * srcgrey , int ndivisions ,
int delta , int * wthresh ) ;
static int inflection_count ( double * x , int n , int delta , int * wthresh ) ;
static void pdfboxes_init ( PDFBOXES * boxes ) ;
static void pdfboxes_free ( PDFBOXES * boxes ) ;
/*
static void pdfboxes_add_box ( PDFBOXES * boxes , PDFBOX * box ) ;
static void pdfboxes_delete ( PDFBOXES * boxes , int n ) ;
*/
static void word_gaps_add ( BREAKINFO * breakinfo , int lcheight ,
double * median_gap ) ;
static void bmp_detect_vertical_lines ( WILLUSBITMAP * bmp , WILLUSBITMAP * cbmp ,
double dpi , double minwidth_in , double maxwidth_in , double minheight_in ,
double anglemax_deg , int white_thresh ) ;
static int vert_line_erase ( WILLUSBITMAP * bmp , WILLUSBITMAP * cbmp ,
WILLUSBITMAP * tmp , int row0 , int col0 , double tanthx ,
double minheight_in , double minwidth_in , double maxwidth_in ,
int white_thresh ) ;
static void willus_dmem_alloc_warn ( int index , void * * ptr , int size ,
char * funcname , int exitcode ) ;
static void willus_dmem_free ( int index , double * * ptr , char * funcname ) ;
static int willus_mem_alloc_warn ( void * * ptr , int size , char * name , int exitcode ) ;
static void willus_mem_free ( double * * ptr , char * name ) ;
static void sortd ( double * x , int n ) ;
static void sorti ( int * x , int n ) ;
static void bmp_init ( WILLUSBITMAP * bmap ) ;
static int bmp_alloc ( WILLUSBITMAP * bmap ) ;
static void bmp_free ( WILLUSBITMAP * bmap ) ;
static int bmp_copy ( WILLUSBITMAP * dest , WILLUSBITMAP * src ) ;
static void bmp_fill ( WILLUSBITMAP * bmp , int r , int g , int b ) ;
static int bmp_bytewidth ( WILLUSBITMAP * bmp ) ;
static unsigned char * bmp_rowptr_from_top ( WILLUSBITMAP * bmp , int row ) ;
static void bmp_more_rows ( WILLUSBITMAP * bmp , double ratio , int pixval ) ;
static int bmp_is_grayscale ( WILLUSBITMAP * bmp ) ;
static int bmp_resample ( WILLUSBITMAP * dest , WILLUSBITMAP * src , double x1 ,
double y1 , double x2 , double y2 , int newwidth , int newheight ) ;
static void bmp_contrast_adjust ( WILLUSBITMAP * dest , WILLUSBITMAP * src , double contrast ) ;
static void bmp_convert_to_greyscale_ex ( WILLUSBITMAP * dst , WILLUSBITMAP * src ) ;
2012-10-27 08:03:49 +00:00
static double bmp_autostraighten ( WILLUSBITMAP * src , WILLUSBITMAP * srcgrey , int white ,
double maxdegrees , double mindegrees , int debug ) ;
2012-10-27 14:38:53 +00:00
static int bmp_rotate_right_angle ( WILLUSBITMAP * bmp , int degrees ) ;
2012-10-16 02:02:38 +00:00
static int bmpmupdf_pixmap_to_bmp ( WILLUSBITMAP * bmp , fz_context * ctx ,
fz_pixmap * pixmap ) ;
2012-10-18 17:20:48 +00:00
static void handle ( int wait , ddjvu_context_t * ctx ) ;
2012-10-16 02:02:38 +00:00
static MASTERINFO _masterinfo , * masterinfo ;
static int master_bmp_inited = 0 ;
static int master_bmp_width = 0 ;
static int master_bmp_height = 0 ;
2012-10-20 08:43:34 +00:00
static int max_page_width_pix = 3000 ;
static int max_page_height_pix = 4000 ;
static double shrink_factor = 0.9 ;
2012-10-24 14:01:36 +00:00
static double zoom_value = 1.0 ;
2012-10-26 09:16:31 +00:00
static double gamma_correction = 1.0 ;
2012-10-16 02:02:38 +00:00
2012-10-18 17:20:48 +00:00
static void k2pdfopt_reflow_bmp ( MASTERINFO * masterinfo , WILLUSBITMAP * src ) {
2012-10-16 02:02:38 +00:00
PAGEINFO _pageinfo , * pageinfo ;
WILLUSBITMAP _srcgrey , * srcgrey ;
2012-10-18 17:20:48 +00:00
int i , white , dpi ;
double area_ratio ;
2012-10-16 02:02:38 +00:00
masterinfo - > debugfolder [ 0 ] = ' \0 ' ;
white = src_whitethresh ; /* Will be set by adjust_contrast() or set to src_whitethresh */
dpi = src_dpi ;
adjust_params_init ( ) ;
set_region_widths ( ) ;
srcgrey = & _srcgrey ;
if ( master_bmp_inited = = 0 ) {
bmp_init ( & masterinfo - > bmp ) ;
master_bmp_inited = 1 ;
}
2012-10-18 17:20:48 +00:00
bmp_free ( & masterinfo - > bmp ) ;
2012-10-16 02:02:38 +00:00
bmp_init ( & masterinfo - > bmp ) ;
bmp_init ( srcgrey ) ;
wrapbmp_init ( ) ;
int ii ;
masterinfo - > bmp . bpp = 8 ;
for ( ii = 0 ; ii < 256 ; ii + + )
masterinfo - > bmp . red [ ii ] = masterinfo - > bmp . blue [ ii ] =
masterinfo - > bmp . green [ ii ] = ii ;
masterinfo - > rows = 0 ;
masterinfo - > bmp . width = dst_width ;
area_ratio = 8.5 * 11.0 * dst_dpi * dst_dpi / ( dst_width * dst_height ) ;
masterinfo - > bmp . height = dst_height * area_ratio * 1.5 ;
bmp_alloc ( & masterinfo - > bmp ) ;
bmp_fill ( & masterinfo - > bmp , 255 , 255 , 255 ) ;
2012-10-27 14:38:53 +00:00
if ( src_rot ! = 0 ) {
bmp_rotate_right_angle ( src , src_rot ) ;
}
2012-10-16 02:02:38 +00:00
BMPREGION region ;
bmp_copy ( srcgrey , src ) ;
adjust_contrast ( src , srcgrey , & white ) ;
white_margins ( src , srcgrey ) ;
2012-10-27 08:03:49 +00:00
if ( erase_vertical_lines > 0 )
bmp_detect_vertical_lines ( srcgrey , src , ( double ) src_dpi , 0.005 , 0.25 ,
min_column_height_inches , src_autostraighten , white ) ;
if ( src_autostraighten > 0. ) {
double rot ;
rot = bmp_autostraighten ( src , srcgrey , white , src_autostraighten , 0.1 , debug ) ;
pageinfo - > page_rot_deg + = rot ;
}
2012-10-16 02:02:38 +00:00
region . r1 = 0 ;
region . r2 = srcgrey - > height - 1 ;
region . c1 = 0 ;
region . c2 = srcgrey - > width - 1 ;
region . bgcolor = white ;
region . bmp = src ;
region . bmp8 = srcgrey ;
masterinfo - > bgcolor = white ;
masterinfo - > fit_to_page = dst_fit_to_page ;
/* Check to see if master bitmap might need more room */
2012-10-18 17:20:48 +00:00
bmpregion_multicolumn_add ( & region , masterinfo , 1 , pageinfo , ( int ) ( 0.25 * src_dpi + .5 ) ) ;
2012-10-16 02:02:38 +00:00
master_bmp_width = masterinfo - > bmp . width ;
master_bmp_height = masterinfo - > rows ;
bmp_free ( srcgrey ) ;
2012-10-18 17:20:48 +00:00
}
2012-10-27 06:24:27 +00:00
void k2pdfopt_set_params ( int bb_width , int bb_height , \
double font_size , double page_margin , \
2012-10-26 09:16:31 +00:00
double line_space , double word_space , \
2012-10-27 08:03:49 +00:00
int wrapping , int straighten , int justification , \
2012-10-27 14:38:53 +00:00
int columns , double contrast , int rotation ) {
2012-10-26 09:16:31 +00:00
dst_userwidth = bb_width ; // dst_width is adjusted in adjust_params_init
dst_userheight = bb_height ;
2012-10-27 06:55:50 +00:00
zoom_value = font_size ;
2012-10-26 09:16:31 +00:00
vertical_line_spacing = line_space ;
word_spacing = word_space ;
text_wrap = wrapping ;
2012-10-27 08:03:49 +00:00
src_autostraighten = straighten ;
2012-10-27 06:55:50 +00:00
max_columns = columns ;
2012-10-26 09:16:31 +00:00
gamma_correction = contrast ; // contrast is only used by k2pdfopt_mupdf_reflow
2012-10-27 14:38:53 +00:00
src_rot = rotation ;
2012-10-27 06:55:50 +00:00
2012-10-27 03:06:53 +00:00
// margin
2012-10-26 09:16:31 +00:00
dst_mar = page_margin ;
dst_martop = - 1.0 ;
dst_marbot = - 1.0 ;
dst_marleft = - 1.0 ;
dst_marright = - 1.0 ;
2012-10-27 03:06:53 +00:00
// justification
if ( justification < 0 ) {
dst_justify = - 1 ;
dst_fulljustify = - 1 ;
}
else if ( justification < = 2 ) {
dst_justify = justification ;
dst_fulljustify = 0 ;
}
else {
dst_justify = - 1 ;
dst_fulljustify = 1 ;
}
2012-10-26 09:16:31 +00:00
}
2012-10-27 06:24:27 +00:00
void k2pdfopt_mupdf_reflow ( fz_document * doc , fz_page * page , fz_context * ctx ) {
2012-10-20 08:43:34 +00:00
fz_device * dev ;
fz_pixmap * pix ;
fz_rect bounds , bounds2 ;
fz_matrix ctm ;
fz_bbox bbox ;
2012-10-18 17:20:48 +00:00
WILLUSBITMAP _src , * src ;
2012-10-20 08:43:34 +00:00
2012-10-27 06:24:27 +00:00
double dpp , zoom ;
zoom = zoom_value ;
2012-10-24 14:01:36 +00:00
double dpi = 250 * zoom ;
2012-10-20 08:43:34 +00:00
do {
dpp = dpi / 72. ;
pix = NULL ;
fz_var ( pix ) ;
bounds = fz_bound_page ( doc , page ) ;
ctm = fz_scale ( dpp , dpp ) ;
// ctm=fz_concat(ctm,fz_rotate(rotation));
bounds2 = fz_transform_rect ( ctm , bounds ) ;
bbox = fz_round_rect ( bounds2 ) ;
2012-10-24 15:53:22 +00:00
printf ( " reading page:%d,%d,%d,%d zoom:%.2f dpi:%.0f \n " , bbox . x0 , bbox . y0 , bbox . x1 , bbox . y1 , zoom , dpi ) ;
2012-10-24 14:01:36 +00:00
zoom_value = zoom ;
zoom * = shrink_factor ;
2012-10-24 15:53:22 +00:00
dpi * = shrink_factor ;
2012-10-20 08:43:34 +00:00
} while ( bbox . x1 > max_page_width_pix | bbox . y1 > max_page_height_pix ) ;
// ctm=fz_translate(0,-page->mediabox.y1);
// ctm=fz_concat(ctm,fz_scale(dpp,-dpp));
// ctm=fz_concat(ctm,fz_rotate(page->rotate));
// ctm=fz_concat(ctm,fz_rotate(0));
// bbox=fz_round_rect(fz_transform_rect(ctm,page->mediabox));
// pix=fz_new_pixmap_with_rect(colorspace,bbox);
pix = fz_new_pixmap_with_bbox ( ctx , fz_device_gray , bbox ) ;
fz_clear_pixmap_with_value ( ctx , pix , 0xff ) ;
dev = fz_new_draw_device ( ctx , pix ) ;
# ifdef MUPDF_TRACE
fz_device * tdev ;
fz_try ( ctx ) {
tdev = fz_new_trace_device ( ctx ) ;
fz_run_page ( doc , page , tdev , ctm , NULL ) ;
}
fz_always ( ctx ) {
fz_free_device ( tdev ) ;
}
# endif
fz_run_page ( doc , page , dev , ctm , NULL ) ;
fz_free_device ( dev ) ;
2012-10-26 09:16:31 +00:00
if ( gamma_correction > = 0.0 ) {
fz_gamma_pixmap ( ctx , pix , gamma_correction ) ;
2012-10-20 08:43:34 +00:00
}
2012-10-18 17:20:48 +00:00
src = & _src ;
masterinfo = & _masterinfo ;
bmp_init ( src ) ;
int status = bmpmupdf_pixmap_to_bmp ( src , ctx , pix ) ;
k2pdfopt_reflow_bmp ( masterinfo , src ) ;
bmp_free ( src ) ;
2012-10-20 08:43:34 +00:00
fz_drop_pixmap ( ctx , pix ) ;
2012-10-18 17:20:48 +00:00
}
void k2pdfopt_djvu_reflow ( ddjvu_page_t * page , ddjvu_context_t * ctx , \
2012-10-27 06:24:27 +00:00
ddjvu_render_mode_t mode , ddjvu_format_t * fmt ) {
2012-10-18 17:20:48 +00:00
WILLUSBITMAP _src , * src ;
ddjvu_rect_t prect ;
ddjvu_rect_t rrect ;
int i , iw , ih , idpi , status ;
2012-10-27 06:24:27 +00:00
double zoom = zoom_value ;
2012-10-24 14:01:36 +00:00
double dpi = 250 * zoom ;
2012-10-18 17:20:48 +00:00
while ( ! ddjvu_page_decoding_done ( page ) )
handle ( 1 , ctx ) ;
iw = ddjvu_page_get_width ( page ) ;
ih = ddjvu_page_get_height ( page ) ;
idpi = ddjvu_page_get_resolution ( page ) ;
prect . x = prect . y = 0 ;
2012-10-20 08:43:34 +00:00
do {
prect . w = iw * dpi / idpi ;
prect . h = ih * dpi / idpi ;
printf ( " reading page:%d,%d,%d,%d dpi:%.0f \n " , prect . x , prect . y , prect . w , prect . h , dpi ) ;
2012-10-24 14:01:36 +00:00
zoom_value = zoom ;
zoom * = shrink_factor ;
2012-10-24 15:53:22 +00:00
dpi * = shrink_factor ;
2012-10-20 08:43:34 +00:00
} while ( prect . w > max_page_width_pix | prect . h > max_page_height_pix ) ;
2012-10-18 17:20:48 +00:00
rrect = prect ;
src = & _src ;
masterinfo = & _masterinfo ;
bmp_init ( src ) ;
src - > width = prect . w = iw * dpi / idpi ;
src - > height = prect . h = ih * dpi / idpi ;
src - > bpp = 8 ;
rrect = prect ;
bmp_alloc ( src ) ;
if ( src - > bpp = = 8 ) {
int ii ;
for ( ii = 0 ; ii < 256 ; ii + + )
src - > red [ ii ] = src - > blue [ ii ] = src - > green [ ii ] = ii ;
}
ddjvu_format_set_row_order ( fmt , 1 ) ;
status = ddjvu_page_render ( page , mode , & prect , & rrect , fmt ,
bmp_bytewidth ( src ) , ( char * ) src - > data ) ;
k2pdfopt_reflow_bmp ( masterinfo , src ) ;
2012-10-16 02:02:38 +00:00
bmp_free ( src ) ;
}
2012-10-18 17:20:48 +00:00
void k2pdfopt_rfbmp_size ( int * width , int * height ) {
2012-10-16 02:02:38 +00:00
* width = master_bmp_width ;
* height = master_bmp_height ;
}
2012-10-18 17:20:48 +00:00
void k2pdfopt_rfbmp_ptr ( unsigned char * * bmp_ptr_ptr ) {
2012-10-16 02:02:38 +00:00
* bmp_ptr_ptr = masterinfo - > bmp . data ;
}
2012-10-24 14:01:36 +00:00
void k2pdfopt_rfbmp_zoom ( double * zoom ) {
* zoom = zoom_value ;
}
2012-10-16 02:02:38 +00:00
/* ansi.c */
# define MAXSIZE 8000
static int ansi_on = 1 ;
static char ansi_buffer [ MAXSIZE ] ;
int avprintf ( FILE * f , char * fmt , va_list args )
{
int status ;
{
if ( ! ansi_on ) {
status = vsprintf ( ansi_buffer , fmt , args ) ;
ansi_parse ( f , ansi_buffer ) ;
} else
status = vfprintf ( f , fmt , args ) ;
}
return ( status ) ;
}
int aprintf ( char * fmt , . . . )
{
va_list args ;
int status ;
va_start ( args , fmt ) ;
status = avprintf ( stdout , fmt , args ) ;
va_end ( args ) ;
return ( status ) ;
}
/*
* * Ensure that max_region_width_inches will be > MIN_REGION_WIDTH_INCHES
* *
* * Should only be called once , after all params are set .
* *
*/
void adjust_params_init ( void )
{
if ( dst_landscape ) {
dst_width = dst_userheight ;
dst_height = dst_userwidth ;
} else {
dst_width = dst_userwidth ;
dst_height = dst_userheight ;
}
if ( dst_mar < 0. )
dst_mar = 0.02 ;
if ( dst_martop < 0. )
dst_martop = dst_mar ;
if ( dst_marbot < 0. )
dst_marbot = dst_mar ;
if ( dst_marleft < 0. )
dst_marleft = dst_mar ;
if ( dst_marright < 0. )
dst_marright = dst_mar ;
if ( ( double ) dst_width / dst_dpi - dst_marleft
- dst_marright < MIN_REGION_WIDTH_INCHES ) {
int olddpi ;
olddpi = dst_dpi ;
dst_dpi = ( int ) ( ( double ) dst_width
/ ( MIN_REGION_WIDTH_INCHES + dst_marleft + dst_marright ) ) ;
aprintf (
TTEXT_BOLD2 " Output DPI of %d is too large. Reduced to %d. " TTEXT_NORMAL " \n \n " ,
olddpi , dst_dpi ) ;
}
}
void set_region_widths ( void )
{
max_region_width_inches = display_width_inches = ( double ) dst_width
/ dst_dpi ;
max_region_width_inches - = ( dst_marleft + dst_marright ) ;
/* This is ensured by adjust_dst_dpi() as of v1.17 */
/*
if ( max_region_width_inches < MIN_REGION_WIDTH_INCHES )
max_region_width_inches = MIN_REGION_WIDTH_INCHES ;
*/
}
/*
* * Process full source page bitmap into rectangular regions and add
* * to the destination bitmap . Start by looking for columns .
* *
* * level = recursion level . First call = 1 , then 2 , . . .
* *
*/
void bmpregion_multicolumn_add ( BMPREGION * region , MASTERINFO * masterinfo ,
int level , PAGEINFO * pageinfo , int colgap0_pixels )
{
static char * funcname = " bmpregion_multicolumn_add " ;
int * row_black_count ;
int r2 , rh , r0 , cgr , maxlevel ;
BMPREGION * srcregion , _srcregion ;
BMPREGION * newregion , _newregion ;
BMPREGION * pageregion ;
double minh ;
int ipr , npr , na ;
int * colcount , * rowcount ;
willus_dmem_alloc_warn ( 1 , ( void * * ) & colcount ,
sizeof ( int ) * ( region - > c2 + 1 ) , funcname , 10 ) ;
willus_dmem_alloc_warn ( 2 , ( void * * ) & rowcount ,
sizeof ( int ) * ( region - > r2 + 1 ) , funcname , 10 ) ;
maxlevel = max_columns / 2 ;
if ( debug )
printf ( " @bmpregion_multicolumn_add (%d,%d) - (%d,%d) lev=%d \n " ,
region - > c1 , region - > r1 , region - > c2 , region - > r2 , level ) ;
newregion = & _newregion ;
( * newregion ) = ( * region ) ;
/* Establish colcount, rowcount arrays */
bmpregion_trim_margins ( newregion , colcount , rowcount , 0xf ) ;
( * newregion ) = ( * region ) ;
srcregion = & _srcregion ;
( * srcregion ) = ( * region ) ;
/* How many page regions do we need? */
minh = min_column_height_inches ;
if ( minh < .01 )
minh = .1 ;
na = ( srcregion - > r2 - srcregion - > r1 + 1 ) / src_dpi / minh ;
if ( na < 1 )
na = 1 ;
na + = 16 ;
/* Allocate page regions */
willus_dmem_alloc_warn ( 3 , ( void * * ) & pageregion , sizeof ( BMPREGION ) * na ,
funcname , 10 ) ;
# ifdef COMMENT
mindr = src_dpi * .045 ; /* src->height/250; */
if ( mindr < 1 )
mindr = 1 ;
# endif
// white=250;
// for (i=0;i<src->width;i++)
// colcount[i]=0;
if ( debug )
bmpregion_row_histogram ( region ) ;
/*
* * Store information about which rows are mostly clear for future
* * processing ( saves processing time ) .
*/
willus_dmem_alloc_warn ( 4 , ( void * * ) & row_black_count ,
region - > bmp8 - > height * sizeof ( int ) , funcname , 10 ) ;
for ( cgr = 0 , r0 = 0 ; r0 < region - > bmp8 - > height ; r0 + + ) {
row_black_count [ r0 ] = bmpregion_row_black_count ( region , r0 ) ;
if ( row_black_count [ r0 ] = = 0 )
cgr + + ;
/*
int dr ;
dr = mindr ;
if ( r0 + dr > region - > bmp8 - > height )
dr = region - > bmp8 - > height - r0 ;
if ( ( row_is_clear [ r0 ] = bmpregion_row_mostly_white ( region , r0 , dr ) ) ! = 0 )
cgr + + ;
*/
// printf("row_is_clear[%d]=%d\n",r0,row_is_clear[r0]);
}
if ( verbose )
printf ( " %d clear rows. \n " , cgr ) ;
if ( max_columns = = 1 ) {
pageregion [ 0 ] = ( * srcregion ) ;
/* Set c1 negative to indicate full span */
pageregion [ 0 ] . c1 = - 1 - pageregion [ 0 ] . c1 ;
npr = 1 ;
} else
/* Find all column dividers in source region and store sequentially in pageregion[] array */
for ( npr = 0 , rh = 0 ; srcregion - > r1 < = srcregion - > r2 ; srcregion - > r1 + =
rh ) {
static char * ierr =
TTEXT_WARN " \n \a Internal error--not enough allocated regions. \n "
" Please inform the developer at willus.com. \n \n " TTEXT_NORMAL ;
if ( npr > = na - 3 ) {
aprintf ( " %s " , ierr ) ;
break ;
}
rh = bmpregion_find_multicolumn_divider ( srcregion , row_black_count ,
pageregion , & npr , colcount , rowcount ) ;
if ( verbose )
printf ( " rh=%d/%d \n " , rh , region - > r2 - region - > r1 + 1 ) ;
}
/* Process page regions by column */
if ( debug )
printf ( " Page regions: %d \n " , npr ) ;
r2 = - 1 ;
for ( ipr = 0 ; ipr < npr ; ) {
int r20 , jpr , colnum , colgap_pixels ;
for ( colnum = 1 ; colnum < = 2 ; colnum + + ) {
if ( debug ) {
printf ( " ipr = %d of %d... \n " , ipr , npr ) ;
printf ( " COLUMN %d... \n " , colnum ) ;
}
r20 = r2 ;
for ( jpr = ipr ; jpr < npr ; jpr + = 2 ) {
/* If we get to a page region that spans the entire source, stop */
if ( pageregion [ jpr ] . c1 < 0 )
break ;
/* See if we should suspend this column and start displaying the next one */
if ( jpr > ipr ) {
double cpdiff , cdiv1 , cdiv2 , rowgap1_in , rowgap2_in ;
if ( column_offset_max < 0. )
break ;
/* Did column divider move too much? */
cdiv1 = ( pageregion [ jpr ] . c2 + pageregion [ jpr + 1 ] . c1 ) / 2. ;
cdiv2 = ( pageregion [ jpr - 2 ] . c2 + pageregion [ jpr - 1 ] . c1 )
/ 2. ;
cpdiff = fabs (
( double ) ( cdiv1 - cdiv2 )
/ ( srcregion - > c2 - srcregion - > c1 + 1 ) ) ;
if ( cpdiff > column_offset_max )
break ;
/* Is gap between this column region and next column region too big? */
rowgap1_in = ( double ) ( pageregion [ jpr ] . r1
- pageregion [ jpr - 2 ] . r2 ) / src_dpi ;
rowgap2_in = ( double ) ( pageregion [ jpr + 1 ] . r1
- pageregion [ jpr - 1 ] . r2 ) / src_dpi ;
if ( rowgap1_in > 0.28 & & rowgap2_in > 0.28 )
break ;
}
( * newregion ) = pageregion [
src_left_to_right ?
jpr + colnum - 1 : jpr + ( 2 - colnum ) ] ;
/* Preserve vertical gap between this region and last region */
if ( r20 > = 0 & & newregion - > r1 - r20 > = 0 )
colgap_pixels = newregion - > r1 - r20 ;
else
colgap_pixels = colgap0_pixels ;
if ( level < maxlevel )
bmpregion_multicolumn_add ( newregion , masterinfo , level + 1 ,
pageinfo , colgap_pixels ) ;
else {
bmpregion_vertically_break ( newregion , masterinfo , text_wrap ,
fit_columns ? - 2.0 : - 1.0 , colcount , rowcount ,
pageinfo , colgap_pixels , 2 * level ) ;
}
r20 = newregion - > r2 ;
}
if ( r20 > r2 )
r2 = r20 ;
if ( jpr = = ipr )
break ;
}
if ( jpr < npr & & pageregion [ jpr ] . c1 < 0 ) {
if ( debug )
printf ( " SINGLE COLUMN REGION... \n " ) ;
( * newregion ) = pageregion [ jpr ] ;
newregion - > c1 = - 1 - newregion - > c1 ;
/* dst_add_gap_src_pixels("Col level",masterinfo,newregion->r1-r2); */
colgap_pixels = newregion - > r1 - r2 ;
bmpregion_vertically_break ( newregion , masterinfo , text_wrap ,
( fit_columns & & ( level > 1 ) ) ? - 2.0 : - 1.0 , colcount ,
rowcount , pageinfo , colgap_pixels , level ) ;
r2 = newregion - > r2 ;
jpr + + ;
}
ipr = jpr ;
}
willus_dmem_free ( 4 , ( double * * ) & row_black_count , funcname ) ;
willus_dmem_free ( 3 , ( double * * ) & pageregion , funcname ) ;
willus_dmem_free ( 2 , ( double * * ) & rowcount , funcname ) ;
willus_dmem_free ( 1 , ( double * * ) & colcount , funcname ) ;
}
static void fit_column_to_screen ( double column_width_inches )
{
double text_width_pixels , lm_pixels , rm_pixels , tm_pixels , bm_pixels ;
if ( ! column_fitted ) {
dpi_org = dst_dpi ;
lm_org = dst_marleft ;
rm_org = dst_marright ;
tm_org = dst_martop ;
bm_org = dst_marbot ;
}
text_width_pixels = max_region_width_inches * dst_dpi ;
lm_pixels = dst_marleft * dst_dpi ;
rm_pixels = dst_marright * dst_dpi ;
tm_pixels = dst_martop * dst_dpi ;
bm_pixels = dst_marbot * dst_dpi ;
dst_dpi = text_width_pixels / column_width_inches ;
dst_marleft = lm_pixels / dst_dpi ;
dst_marright = rm_pixels / dst_dpi ;
dst_martop = tm_pixels / dst_dpi ;
dst_marbot = bm_pixels / dst_dpi ;
set_region_widths ( ) ;
column_fitted = 1 ;
}
static void restore_output_dpi ( void )
{
if ( column_fitted ) {
dst_dpi = dpi_org ;
dst_marleft = lm_org ;
dst_marright = rm_org ;
dst_martop = tm_org ;
dst_marbot = bm_org ;
set_region_widths ( ) ;
}
column_fitted = 0 ;
}
void adjust_contrast ( WILLUSBITMAP * src , WILLUSBITMAP * srcgrey , int * white )
{
int i , j , tries , wc , tc , hist [ 256 ] ;
double contrast , rat0 ;
WILLUSBITMAP * dst , _dst ;
if ( debug & & verbose )
printf ( " \n At adjust_contrast. \n " ) ;
if ( ( * white ) < = 0 )
( * white ) = 192 ;
/* If contrast_max negative, use it as fixed contrast adjustment. */
if ( contrast_max < 0. ) {
bmp_contrast_adjust ( srcgrey , srcgrey , - contrast_max ) ;
if ( dst_color & & fabs ( contrast_max + 1.0 ) > 1e-4 )
bmp_contrast_adjust ( src , src , - contrast_max ) ;
return ;
}
dst = & _dst ;
bmp_init ( dst ) ;
wc = 0 ; /* Avoid compiler warning */
tc = srcgrey - > width * srcgrey - > height ;
rat0 = 0.5 ; /* Avoid compiler warning */
for ( contrast = 1.0 , tries = 0 ; contrast < contrast_max + .01 ; tries + + ) {
if ( fabs ( contrast - 1.0 ) > 1e-4 )
bmp_contrast_adjust ( dst , srcgrey , contrast ) ;
else
bmp_copy ( dst , srcgrey ) ;
/*Get bitmap histogram */
for ( i = 0 ; i < 256 ; i + + )
hist [ i ] = 0 ;
for ( j = 0 ; j < dst - > height ; j + + ) {
unsigned char * p ;
p = bmp_rowptr_from_top ( dst , j ) ;
for ( i = 0 ; i < dst - > width ; i + + , p + + )
hist [ p [ 0 ] ] + + ;
}
if ( tries = = 0 ) {
int h1 ;
for ( h1 = 0 , j = ( * white ) ; j < 256 ; j + + )
h1 + = hist [ j ] ;
rat0 = ( double ) h1 / tc ;
if ( debug & & verbose )
printf ( " rat0 = rat[%d-255]=%.4f \n " , ( * white ) , rat0 ) ;
}
/* Find white ratio */
/*
for ( wc = hist [ 254 ] , j = 253 ; j > = 252 ; j - - )
if ( hist [ j ] > wc1 )
wc1 = hist [ j ] ;
*/
for ( wc = 0 , j = 252 ; j < = 255 ; j + + )
wc + = hist [ j ] ;
/*
if ( ( double ) wc / tc > = rat0 * 0.7 & & ( double ) hist [ 255 ] / wc > 0.995 )
break ;
*/
if ( debug & & verbose )
printf ( " %2d. Contrast=%7.2f, rat[252-255]/rat0=%.4f \n " ,
tries + 1 , contrast , ( double ) wc / tc / rat0 ) ;
if ( ( double ) wc / tc > = rat0 * 0.94 )
break ;
contrast * = 1.05 ;
}
if ( debug )
printf ( " Contrast=%7.2f, rat[252-255]/rat0=%.4f \n " , contrast ,
( double ) wc / tc / rat0 ) ;
/*
bmp_write ( dst , " outc.png " , stdout , 100 ) ;
wfile_written_info ( " outc.png " , stdout ) ;
exit ( 10 ) ;
*/
bmp_copy ( srcgrey , dst ) ;
/* Maybe don't adjust the contrast for the color bitmap? */
if ( dst_color & & fabs ( contrast - 1.0 ) > 1e-4 )
bmp_contrast_adjust ( src , src , contrast ) ;
bmp_free ( dst ) ;
}
static int bmpregion_row_black_count ( BMPREGION * region , int r0 )
{
unsigned char * p ;
int i , nc , c ;
p = bmp_rowptr_from_top ( region - > bmp8 , r0 ) + region - > c1 ;
nc = region - > c2 - region - > c1 + 1 ;
for ( c = i = 0 ; i < nc ; i + + , p + + )
if ( p [ 0 ] < region - > bgcolor )
c + + ;
return ( c ) ;
}
/*
* * Returns height of region found and divider position in ( * divider_column ) .
* * ( * divider_column ) is absolute position on source bitmap .
* *
*/
static int bmpregion_find_multicolumn_divider ( BMPREGION * region ,
int * row_black_count , BMPREGION * pageregion , int * npr , int * colcount ,
int * rowcount )
{
int itop , i , dm , middle , divider_column , min_height_pixels , mhp2 ,
min_col_gap_pixels ;
BMPREGION _newregion , * newregion , column [ 2 ] ;
BREAKINFO * breakinfo , _breakinfo ;
int * rowmin , * rowmax ;
static char * funcname = " bmpregion_find_multicolumn_divider " ;
if ( debug )
printf ( " @bmpregion_find_multicolumn_divider(%d,%d)-(%d,%d) \n " ,
region - > c1 , region - > r1 , region - > c2 , region - > r2 ) ;
breakinfo = & _breakinfo ;
breakinfo - > textrow = NULL ;
breakinfo_alloc ( 101 , breakinfo , region - > r2 - region - > r1 + 1 ) ;
bmpregion_find_vertical_breaks ( region , breakinfo , colcount , rowcount ,
column_row_gap_height_in ) ;
/*
{
printf ( " region (%d,%d)-(%d,%d) has %d breaks: \n " , region - > c1 , region - > r1 , region - > c2 , region - > r2 , breakinfo - > n ) ;
for ( i = 0 ; i < breakinfo - > n ; i + + )
printf ( " Rows %d - %d \n " , breakinfo - > textrow [ i ] . r1 , breakinfo - > textrow [ i ] . r2 ) ;
}
*/
newregion = & _newregion ;
( * newregion ) = ( * region ) ;
min_height_pixels = min_column_height_inches * src_dpi ; /* src->height/15; */
mhp2 = min_height_pixels - 1 ;
if ( mhp2 < 0 )
mhp2 = 0 ;
dm = 1 + ( region - > c2 - region - > c1 + 1 ) * column_gap_range / 2. ;
middle = ( region - > c2 - region - > c1 + 1 ) / 2 ;
min_col_gap_pixels = ( int ) ( min_column_gap_inches * src_dpi + .5 ) ;
if ( verbose ) {
printf ( " (dm=%d, width=%d, min_gap=%d) \n " , dm ,
region - > c2 - region - > c1 + 1 , min_col_gap_pixels ) ;
printf ( " Checking regions (r1=%d, r2=%d, minrh=%d).. " , region - > r1 ,
region - > r2 , min_height_pixels ) ;
fflush ( stdout ) ;
}
breakinfo_sort_by_row_position ( breakinfo ) ;
willus_dmem_alloc_warn ( 5 , ( void * * ) & rowmin ,
( region - > c2 + 10 ) * 2 * sizeof ( int ) , funcname , 10 ) ;
rowmax = & rowmin [ region - > c2 + 10 ] ;
for ( i = 0 ; i < region - > c2 + 2 ; i + + ) {
rowmin [ i ] = region - > r2 + 2 ;
rowmax [ i ] = - 1 ;
}
/* Start with top-most and bottom-most regions, look for column dividers */
for ( itop = 0 ;
itop < breakinfo - > n
& & breakinfo - > textrow [ itop ] . r1
< region - > r2 + 1 - min_height_pixels ; itop + + ) {
int ibottom ;
for ( ibottom = breakinfo - > n - 1 ;
ibottom > = itop
& & breakinfo - > textrow [ ibottom ] . r2
- breakinfo - > textrow [ itop ] . r1
> = min_height_pixels ; ibottom - - ) {
/*
* * Look for vertical shaft of clear space that clearly demarcates
* * two columns
*/
for ( i = 0 ; i < dm ; i + + ) {
int foundgap , ii , c1 , c2 , iiopt , status ;
newregion - > c1 = region - > c1 + middle - i ;
/* If we've effectively already checked this shaft, move on */
if ( itop > = rowmin [ newregion - > c1 ]
& & ibottom < = rowmax [ newregion - > c1 ] )
continue ;
newregion - > c2 = newregion - > c1 + min_col_gap_pixels - 1 ;
newregion - > r1 = breakinfo - > textrow [ itop ] . r1 ;
newregion - > r2 = breakinfo - > textrow [ ibottom ] . r2 ;
foundgap = bmpregion_is_clear ( newregion , row_black_count ,
gtc_in ) ;
if ( ! foundgap & & i > 0 ) {
newregion - > c1 = region - > c1 + middle + i ;
newregion - > c2 = newregion - > c1 + min_col_gap_pixels - 1 ;
foundgap = bmpregion_is_clear ( newregion , row_black_count ,
gtc_in ) ;
}
if ( ! foundgap )
continue ;
/* Found a gap, but look for a better gap nearby */
c1 = newregion - > c1 ;
c2 = newregion - > c2 ;
for ( iiopt = 0 , ii = - min_col_gap_pixels ;
ii < = min_col_gap_pixels ; ii + + ) {
int newgap ;
newregion - > c1 = c1 + ii ;
newregion - > c2 = c2 + ii ;
newgap = bmpregion_is_clear ( newregion , row_black_count ,
gtc_in ) ;
if ( newgap > 0 & & newgap < foundgap ) {
iiopt = ii ;
foundgap = newgap ;
if ( newgap = = 1 )
break ;
}
}
newregion - > c1 = c1 + iiopt ;
/* If we've effectively already checked this shaft, move on */
if ( itop > = rowmin [ newregion - > c1 ]
& & ibottom < = rowmax [ newregion - > c1 ] )
continue ;
newregion - > c2 = c2 + iiopt ;
divider_column = newregion - > c1 + min_col_gap_pixels / 2 ;
status = bmpregion_column_height_and_gap_test ( column , region ,
breakinfo - > textrow [ itop ] . r1 ,
breakinfo - > textrow [ ibottom ] . r2 , divider_column ,
colcount , rowcount ) ;
/* If fails column height or gap test, mark as bad */
if ( status ) {
if ( itop < rowmin [ newregion - > c1 ] )
rowmin [ newregion - > c1 ] = itop ;
if ( ibottom > rowmax [ newregion - > c1 ] )
rowmax [ newregion - > c1 ] = ibottom ;
}
/* If right column too short, stop looking */
if ( status & 2 )
break ;
if ( ! status ) {
int colheight ;
/* printf(" GOT COLUMN DIVIDER AT x=%d.\n",(*divider_column)); */
if ( verbose ) {
printf ( " \n GOOD REGION: col gap=(%d,%d) - (%d,%d) \n "
" r1=%d, r2=%d \n " ,
newregion - > c1 , newregion - > r1 , newregion - > c2 ,
newregion - > r2 , breakinfo - > textrow [ itop ] . r1 ,
breakinfo - > textrow [ ibottom ] . r2 ) ;
}
if ( itop > 0 ) {
/* add 1-column region */
pageregion [ ( * npr ) ] = ( * region ) ;
pageregion [ ( * npr ) ] . r2 = breakinfo - > textrow [ itop - 1 ] . r2 ;
if ( pageregion [ ( * npr ) ] . r2
> pageregion [ ( * npr ) ] . bmp8 - > height - 1 )
pageregion [ ( * npr ) ] . r2 =
pageregion [ ( * npr ) ] . bmp8 - > height - 1 ;
bmpregion_trim_margins ( & pageregion [ ( * npr ) ] , colcount ,
rowcount , 0xf ) ;
/* Special flag to indicate full-width region */
pageregion [ ( * npr ) ] . c1 = - 1 - pageregion [ ( * npr ) ] . c1 ;
( * npr ) = ( * npr ) + 1 ;
}
pageregion [ ( * npr ) ] = column [ 0 ] ;
( * npr ) = ( * npr ) + 1 ;
pageregion [ ( * npr ) ] = column [ 1 ] ;
( * npr ) = ( * npr ) + 1 ;
colheight = breakinfo - > textrow [ ibottom ] . r2 - region - > r1 + 1 ;
breakinfo_free ( 101 , breakinfo ) ;
/*
printf ( " Returning %d divider column = %d - %d \n " , region - > r2 - region - > r1 + 1 , newregion - > c1 , newregion - > c2 ) ;
*/
return ( colheight ) ;
}
}
}
}
if ( verbose )
printf ( " NO GOOD REGION FOUND. \n " ) ;
pageregion [ ( * npr ) ] = ( * region ) ;
bmpregion_trim_margins ( & pageregion [ ( * npr ) ] , colcount , rowcount , 0xf ) ;
/* Special flag to indicate full-width region */
pageregion [ ( * npr ) ] . c1 = - 1 - pageregion [ ( * npr ) ] . c1 ;
( * npr ) = ( * npr ) + 1 ;
/* (*divider_column)=region->c2+1; */
willus_dmem_free ( 5 , ( double * * ) & rowmin , funcname ) ;
breakinfo_free ( 101 , breakinfo ) ;
/*
printf ( " Returning %d \n " , region - > r2 - region - > r1 + 1 ) ;
*/
return ( region - > r2 - region - > r1 + 1 ) ;
}
/*
* * 1 = column 1 too short
* * 2 = column 2 too short
* * 3 = both too short
* * 0 = both okay
* * Both columns must pass height requirement .
* *
* * Also , if gap between columns > max_column_gap_inches , fails test . ( 8 - 31 - 12 )
* *
*/
static int bmpregion_column_height_and_gap_test ( BMPREGION * column ,
BMPREGION * region , int r1 , int r2 , int cmid , int * colcount ,
int * rowcount )
{
int min_height_pixels , status ;
status = 0 ;
min_height_pixels = min_column_height_inches * src_dpi ;
column [ 0 ] = ( * region ) ;
column [ 0 ] . r1 = r1 ;
column [ 0 ] . r2 = r2 ;
column [ 0 ] . c2 = cmid - 1 ;
bmpregion_trim_margins ( & column [ 0 ] , colcount , rowcount , 0xf ) ;
/*
printf ( " COL1: pix=%d (%d - %d) \n " , newregion - > r2 - newregion - > r1 + 1 , newregion - > r1 , newregion - > r2 ) ;
*/
if ( column [ 0 ] . r2 - column [ 0 ] . r1 + 1 < min_height_pixels )
status | = 1 ;
column [ 1 ] = ( * region ) ;
column [ 1 ] . r1 = r1 ;
column [ 1 ] . r2 = r2 ;
column [ 1 ] . c1 = cmid ;
column [ 1 ] . c2 = region - > c2 ;
bmpregion_trim_margins ( & column [ 1 ] , colcount , rowcount , 0xf ) ;
/*
printf ( " COL2: pix=%d (%d - %d) \n " , newregion - > r2 - newregion - > r1 + 1 , newregion - > r1 , newregion - > r2 ) ;
*/
if ( column [ 1 ] . r2 - column [ 1 ] . r1 + 1 < min_height_pixels )
status | = 2 ;
/* Make sure gap between columns is not too large */
if ( max_column_gap_inches > = 0.
& & column [ 1 ] . c1 - column [ 0 ] . c2 - 1
> max_column_gap_inches * src_dpi )
status | = 4 ;
return ( status ) ;
}
/*
* * Return 0 if there are dark pixels in the region . NZ otherwise .
*/
static int bmpregion_is_clear ( BMPREGION * region , int * row_black_count ,
double gt_in )
{
int r , c , nc , pt ;
/*
* * row_black_count [ ] doesn ' t necessarily match up to this particular region ' s columns .
* * So if row_black_count [ ] = = 0 , the row is clear , otherwise it has to be counted .
* * because the columns are a subset .
*/
/* nr=region->r2-region->r1+1; */
nc = region - > c2 - region - > c1 + 1 ;
pt = ( int ) ( gt_in * src_dpi * nc + .5 ) ;
if ( pt < 0 )
pt = 0 ;
for ( c = 0 , r = region - > r1 ; r < = region - > r2 ; r + + ) {
if ( r < 0 | | r > = region - > bmp8 - > height )
continue ;
if ( row_black_count [ r ] = = 0 )
continue ;
c + = bmpregion_row_black_count ( region , r ) ;
if ( c > pt )
return ( 0 ) ;
}
/*
printf ( " (%d,%d)-(%d,%d): c=%d, pt=%d (gt_in=%g) \n " ,
region - > c1 , region - > r1 , region - > c2 , region - > r2 , c , pt , gt_in ) ;
*/
return ( 1 + ( int ) 10 * c / pt ) ;
}
static void bmpregion_row_histogram ( BMPREGION * region )
{
static char * funcname = " bmpregion_row_histogram " ;
WILLUSBITMAP * src ;
FILE * out ;
static int * rowcount ;
static int * hist ;
int i , j , nn ;
willus_dmem_alloc_warn ( 6 , ( void * * ) & rowcount ,
( region - > r2 - region - > r1 + 1 ) * sizeof ( int ) , funcname , 10 ) ;
willus_dmem_alloc_warn ( 7 , ( void * * ) & hist ,
( region - > c2 - region - > c1 + 1 ) * sizeof ( int ) , funcname , 10 ) ;
src = region - > bmp8 ;
for ( j = region - > r1 ; j < = region - > r2 ; j + + ) {
unsigned char * p ;
p = bmp_rowptr_from_top ( src , j ) + region - > c1 ;
rowcount [ j - region - > r1 ] = 0 ;
for ( i = region - > c1 ; i < = region - > c2 ; i + + , p + + )
if ( p [ 0 ] < region - > bgcolor )
rowcount [ j - region - > r1 ] + + ;
}
for ( i = region - > c1 ; i < = region - > c2 ; i + + )
hist [ i - region - > c1 ] = 0 ;
for ( i = region - > r1 ; i < = region - > r2 ; i + + )
hist [ rowcount [ i - region - > r1 ] ] + + ;
for ( i = region - > c2 - region - > c1 + 1 ; i > = 0 ; i - - )
if ( hist [ i ] > 0 )
break ;
nn = i ;
out = fopen ( " hist.ep " , " w " ) ;
for ( i = 0 ; i < = nn ; i + + )
fprintf ( out , " %5d %5d \n " , i , hist [ i ] ) ;
fclose ( out ) ;
out = fopen ( " rowcount.ep " , " w " ) ;
for ( i = 0 ; i < region - > r2 - region - > r1 + 1 ; i + + )
fprintf ( out , " %5d %5d \n " , i , rowcount [ i ] ) ;
fclose ( out ) ;
willus_dmem_free ( 7 , ( double * * ) & hist , funcname ) ;
willus_dmem_free ( 6 , ( double * * ) & rowcount , funcname ) ;
}
/*
* * Mark the region
* * mark_flags & 1 : Mark top
* * mark_flags & 2 : Mark bottom
* * mark_flags & 4 : Mark left
* * mark_flags & 8 : Mark right
* *
*/
static void mark_source_page ( BMPREGION * region0 , int caller_id , int mark_flags )
{
static int display_order = 0 ;
int i , n , nn , fontsize , r , g , b , shownum ;
char num [ 16 ] ;
BMPREGION * region , _region ;
BMPREGION * clip , _clip ;
if ( ! show_marked_source )
return ;
if ( region0 = = NULL ) {
display_order = 0 ;
return ;
}
region = & _region ;
( * region ) = ( * region0 ) ;
/* Clip the region w/ignored margins */
clip = & _clip ;
clip - > bmp = region0 - > bmp ;
get_white_margins ( clip ) ;
if ( region - > c1 < clip - > c1 )
region - > c1 = clip - > c1 ;
if ( region - > c2 > clip - > c2 )
region - > c2 = clip - > c2 ;
if ( region - > r1 < clip - > r1 )
region - > r1 = clip - > r1 ;
if ( region - > r2 > clip - > r2 )
region - > r2 = clip - > r2 ;
if ( region - > r2 < = region - > r1 | | region - > c2 < = region - > c1 )
return ;
/* printf("@mark_source_page(display_order=%d)\n",display_order); */
if ( caller_id = = 1 ) {
display_order + + ;
shownum = 1 ;
n = ( int ) ( src_dpi / 60. + 0.5 ) ;
if ( n < 5 )
n = 5 ;
r = 255 ;
g = b = 0 ;
} else if ( caller_id = = 2 ) {
shownum = 0 ;
n = 2 ;
r = 0 ;
g = 0 ;
b = 255 ;
} else if ( caller_id = = 3 ) {
shownum = 0 ;
n = ( int ) ( src_dpi / 80. + 0.5 ) ;
if ( n < 4 )
n = 4 ;
r = 0 ;
g = 255 ;
b = 0 ;
} else if ( caller_id = = 4 ) {
shownum = 0 ;
n = 2 ;
r = 255 ;
g = 0 ;
b = 255 ;
} else {
shownum = 0 ;
n = 2 ;
r = 140 ;
g = 140 ;
b = 140 ;
}
if ( n < 2 )
n = 2 ;
nn = ( region - > c2 + 1 - region - > c1 ) / 2 ;
if ( n > nn )
n = nn ;
nn = ( region - > r2 + 1 - region - > r1 ) / 2 ;
if ( n > nn )
n = nn ;
if ( n < 1 )
n = 1 ;
for ( i = 0 ; i < n ; i + + ) {
int j ;
unsigned char * p ;
if ( mark_flags & 1 ) {
p = bmp_rowptr_from_top ( region - > marked , region - > r1 + i )
+ region - > c1 * 3 ;
for ( j = region - > c1 ; j < = region - > c2 ; j + + , p + = 3 ) {
p [ 0 ] = r ;
p [ 1 ] = g ;
p [ 2 ] = b ;
}
}
if ( mark_flags & 2 ) {
p = bmp_rowptr_from_top ( region - > marked , region - > r2 - i )
+ region - > c1 * 3 ;
for ( j = region - > c1 ; j < = region - > c2 ; j + + , p + = 3 ) {
p [ 0 ] = r ;
p [ 1 ] = g ;
p [ 2 ] = b ;
}
}
if ( mark_flags & 16 ) /* rowbase */
{
p = bmp_rowptr_from_top ( region - > marked , region - > rowbase - i )
+ region - > c1 * 3 ;
for ( j = region - > c1 ; j < = region - > c2 ; j + + , p + = 3 ) {
p [ 0 ] = r ;
p [ 1 ] = g ;
p [ 2 ] = b ;
}
}
if ( mark_flags & 4 )
for ( j = region - > r1 ; j < = region - > r2 ; j + + ) {
p = bmp_rowptr_from_top ( region - > marked , j )
+ ( region - > c1 + i ) * 3 ;
p [ 0 ] = r ;
p [ 1 ] = g ;
p [ 2 ] = b ;
}
if ( mark_flags & 8 )
for ( j = region - > r1 ; j < = region - > r2 ; j + + ) {
p = bmp_rowptr_from_top ( region - > marked , j )
+ ( region - > c2 - i ) * 3 ;
p [ 0 ] = r ;
p [ 1 ] = g ;
p [ 2 ] = b ;
}
}
if ( ! shownum )
return ;
fontsize = region - > c2 - region - > c1 + 1 ;
if ( fontsize > region - > r2 - region - > r1 + 1 )
fontsize = region - > r2 - region - > r1 + 1 ;
fontsize / = 2 ;
if ( fontsize > src_dpi )
fontsize = src_dpi ;
if ( fontsize < 5 )
return ;
fontrender_set_typeface ( " helvetica-bold " ) ;
fontrender_set_fgcolor ( r , g , b ) ;
fontrender_set_bgcolor ( 255 , 255 , 255 ) ;
fontrender_set_pixel_size ( fontsize ) ;
fontrender_set_justification ( 4 ) ;
fontrender_set_or ( 1 ) ;
sprintf ( num , " %d " , display_order ) ;
fontrender_render ( region - > marked , ( double ) ( region - > c1 + region - > c2 ) / 2. ,
( double ) ( region - > marked - > height - ( ( region - > r1 + region - > r2 ) / 2. ) ) ,
num , 0 , NULL ) ;
/* printf(" done mark_source_page.\n"); */
}
/*
* * Input : A generic rectangular region from the source file . It will not
* * be checked for multiple columns , but the text may be wrapped
* * ( controlled by allow_text_wrapping input ) .
* *
* * force_scale = = - 2 : Use same scale for entire column - - fit to device
* *
* * This function looks for vertical gaps in the region and breaks it at
* * the widest ones ( if there are significantly wider ones ) .
* *
*/
static void bmpregion_vertically_break ( BMPREGION * region ,
MASTERINFO * masterinfo , int allow_text_wrapping , double force_scale ,
int * colcount , int * rowcount , PAGEINFO * pageinfo , int colgap_pixels ,
int ncols )
{
static int ncols_last = - 1 ;
int regcount , i , i1 , biggap , revert , trim_flags , allow_vertical_breaks ;
int justification_flags , caller_id , marking_flags , rbdelta ;
// int trim_left_and_right;
BMPREGION * bregion , _bregion ;
BREAKINFO * breakinfo , _breakinfo ;
double region_width_inches , region_height_inches ;
# if (WILLUSDEBUGX & 1)
printf ( " \n \n @bmpregion_vertically_break. colgap_pixels=%d \n \n " , colgap_pixels ) ;
# endif
trim_flags = 0xf ;
allow_vertical_breaks = 1 ;
justification_flags = 0x8f ; /* Don't know region justification status yet. Use user settings. */
rbdelta = - 1 ;
breakinfo = & _breakinfo ;
breakinfo - > textrow = NULL ;
breakinfo_alloc ( 102 , breakinfo , region - > r2 - region - > r1 + 1 ) ;
bmpregion_find_vertical_breaks ( region , breakinfo , colcount , rowcount , - 1.0 ) ;
/* Should there be a check for breakinfo->n==0 here? */
/* Don't think it breaks anything to let it go. -- 6-11-12 */
# if (WILLUSDEBUGX & 2)
breakinfo_echo ( breakinfo ) ;
# endif
breakinfo_remove_small_rows ( breakinfo , 0.25 , 0.5 , region , colcount ,
rowcount ) ;
# if (WILLUSDEBUGX & 2)
breakinfo_echo ( breakinfo ) ;
# endif
breakinfo - > centered = bmpregion_is_centered ( region , breakinfo , 0 ,
breakinfo - > n - 1 , NULL ) ;
# if (WILLUSDEBUGX & 2)
breakinfo_echo ( breakinfo ) ;
# endif
/*
newregion = & _newregion ;
for ( i = 0 ; i < breakinfo - > n ; i + + )
{
( * newregion ) = ( * region ) ;
newregion - > r1 = breakinfo - > textrow [ i ] . r1 ;
newregion - > r2 = breakinfo - > textrow [ i ] . r2 ;
bmpregion_add ( newregion , breakinfo , masterinfo , allow_text_wrapping , force_scale , 0 , 1 ,
colcount , rowcount , pageinfo , 0 , 0xf ) ;
}
breakinfo_free ( breakinfo ) ;
return ;
*/
/*
if ( ! vertical_breaks )
{
caller_id = 100 ;
marking_flags = 0 ;
bmpregion_add ( region , breakinfo , masterinfo , allow_text_wrapping , trim_flags ,
allow_vertical_breaks , force_scale , justification_flags ,
caller_id , colcount , rowcount , pageinfo , marking_flags , rbdelta ) ;
breakinfo_free ( breakinfo ) ;
return ;
}
*/
/* Red, numbered region */
mark_source_page ( region , 1 , 0xf ) ;
bregion = & _bregion ;
if ( debug ) {
if ( ! allow_text_wrapping )
printf (
" @bmpregion_vertically_break (no break) (%d,%d) - (%d,%d) (scale=%g) \n " ,
region - > c1 , region - > r1 , region - > c2 , region - > r2 ,
force_scale ) ;
else
printf (
" @bmpregion_vertically_break (allow break) (%d,%d) - (%d,%d) (scale=%g) \n " ,
region - > c1 , region - > r1 , region - > c2 , region - > r2 ,
force_scale ) ;
}
/*
* * Tag blank rows and columns
*/
if ( vertical_break_threshold < 0. | | breakinfo - > n < 6 )
biggap = - 1. ;
else {
int gap_median ;
/*
int rowheight_median ;
breakinfo_sort_by_rowheight ( breakinfo ) ;
rowheight_median = breakinfo - > textrow [ breakinfo - > n / 2 ] . rowheight ;
*/
# ifdef WILLUSDEBUG
for ( i = 0 ; i < breakinfo - > n ; i + + )
printf ( " gap[%d]=%d \n " , i , breakinfo - > textrow [ i ] . gap ) ;
# endif
breakinfo_sort_by_gap ( breakinfo ) ;
gap_median = breakinfo - > textrow [ breakinfo - > n / 2 ] . gap ;
# ifdef WILLUSDEBUG
printf ( " median=%d \n " , gap_median ) ;
# endif
biggap = gap_median * vertical_break_threshold ;
breakinfo_sort_by_row_position ( breakinfo ) ;
}
# ifdef WILLUSDEBUG
printf ( " biggap=%d \n " , biggap ) ;
# endif
region_width_inches = ( double ) ( region - > c2 - region - > c1 + 1 ) / src_dpi ;
region_height_inches = ( double ) ( region - > r2 - region - > r1 + 1 ) / src_dpi ;
/*
trim_left_and_right = 1 ;
if ( region_width_inches < = max_region_width_inches )
trim_left_and_right = 0 ;
*/
/*
printf ( " force_scale=%g, rwi = %g, rwi/mrwi = %g, rhi = %g \n " ,
force_scale ,
region_width_inches ,
region_width_inches / max_region_width_inches ,
region_height_inches ) ;
*/
if ( force_scale < - 1.5 & & region_width_inches > MIN_REGION_WIDTH_INCHES
& & region_width_inches / max_region_width_inches < 1.25
& & region_height_inches > 0.5 ) {
revert = 1 ;
force_scale = - 1.0 ;
fit_column_to_screen ( region_width_inches ) ;
// trim_left_and_right = 0;
allow_text_wrapping = 0 ;
} else
revert = 0 ;
/* Add the regions (broken vertically) */
caller_id = 1 ;
/*
if ( trim_left_and_right )
trim_flags = 0xf ;
else
trim_flags = 0xc ;
*/
trim_flags = 0xf ;
for ( regcount = i1 = i = 0 ; i1 < breakinfo - > n ; i + + ) {
int i2 ;
i2 = i < breakinfo - > n ? i : breakinfo - > n - 1 ;
if ( i > = breakinfo - > n
| | ( biggap > 0. & & breakinfo - > textrow [ i2 ] . gap > = biggap ) ) {
int j , c1 , c2 , nc , nowrap ;
double regwidth , ar1 , rh1 ;
// printf("CALLER 1: i1=%d, i2=%d (breakinfo->n=%d)\n",i1,i2,breakinfo->n);
( * bregion ) = ( * region ) ;
bregion - > r1 = breakinfo - > textrow [ i1 ] . r1 ;
bregion - > r2 = breakinfo - > textrow [ i2 ] . r2 ;
c1 = breakinfo - > textrow [ i1 ] . c1 ;
c2 = breakinfo - > textrow [ i1 ] . c2 ;
nc = c2 - c1 + 1 ;
if ( nc < = 0 )
nc = 1 ;
rh1 = ( double ) ( breakinfo - > textrow [ i1 ] . r2
- breakinfo - > textrow [ i1 ] . r1 + 1 ) / src_dpi ;
ar1 = ( double ) ( breakinfo - > textrow [ i1 ] . r2
- breakinfo - > textrow [ i1 ] . r1 + 1 ) / nc ;
for ( j = i1 + 1 ; j < = i2 ; j + + ) {
if ( c1 > breakinfo - > textrow [ j ] . c1 )
c1 = breakinfo - > textrow [ j ] . c1 ;
if ( c2 < breakinfo - > textrow [ j ] . c2 )
c2 = breakinfo - > textrow [ j ] . c2 ;
}
regwidth = ( double ) ( c2 - c1 + 1 ) / src_dpi ;
marking_flags = ( i1 = = 0 ? 0 : 1 )
| ( i2 = = breakinfo - > n - 1 ? 0 : 2 ) ;
/* Green */
mark_source_page ( bregion , 3 , marking_flags ) ;
nowrap = ( ( regwidth < = max_region_width_inches
& & allow_text_wrapping < 2 )
| | ( ar1 > no_wrap_ar_limit
& & rh1 > no_wrap_height_limit_inches ) ) ;
/*
* * If between regions , or if the next region isn ' t going to be
* * wrapped , or if the next region starts a different number of
* * columns than before , then " flush and gap. "
*/
if ( regcount > 0 | | just_flushed_internal | | nowrap
| | ( ncols_last > 0 & & ncols_last ! = ncols ) ) {
int gap ;
# ifdef WILLUSDEBUG
printf ( " wrapflush1 \n " ) ;
# endif
if ( ! just_flushed_internal )
wrapbmp_flush ( masterinfo , 0 , pageinfo , 0 ) ;
gap = regcount = = 0 ?
colgap_pixels : breakinfo - > textrow [ i1 - 1 ] . gap ;
if ( regcount = = 0 & & beginning_gap_internal > 0 ) {
if ( last_h5050_internal > 0 ) {
if ( fabs (
1.
- ( double ) breakinfo - > textrow [ i1 ] . h5050
/ last_h5050_internal ) > .1 )
dst_add_gap_src_pixels ( " Col/Page break " , masterinfo ,
colgap_pixels ) ;
last_h5050_internal = - 1 ;
}
gap = beginning_gap_internal ;
beginning_gap_internal = - 1 ;
}
dst_add_gap_src_pixels ( " Vert break " , masterinfo , gap ) ;
} else {
if ( regcount = = 0 & & beginning_gap_internal < 0 )
beginning_gap_internal = colgap_pixels ;
}
bmpregion_add ( bregion , breakinfo , masterinfo , allow_text_wrapping ,
trim_flags , allow_vertical_breaks , force_scale ,
justification_flags , caller_id , colcount , rowcount ,
pageinfo , marking_flags , rbdelta ) ;
regcount + + ;
i1 = i2 + 1 ;
}
}
ncols_last = ncols ;
if ( revert )
restore_output_dpi ( ) ;
breakinfo_free ( 102 , breakinfo ) ;
}
/*
* *
* * MAIN BITMAP REGION ADDING FUNCTION
* *
* * NOTE : This function calls itself recursively !
* *
* * Input : A generic rectangular region from the source file . It will not
* * be checked for multiple columns , but the text may be wrapped
* * ( controlled by allow_text_wrapping input ) .
* *
* * First , excess margins are trimmed off of the region .
* *
* * Then , if the resulting trimmed region is wider than the max desirable width
* * and allow_text_wrapping is non - zero , then the
* * bmpregion_analyze_justification_and_line_spacing ( ) function is called .
* * Otherwise the region is scaled to fit and added to the master set of pages .
* *
* * justification_flags
* * Bits 6 - 7 : 0 = document is not fully justified
* * 1 = document is fully justified
* * 2 = don ' t know document justification yet
* * Bits 4 - 5 : 0 = Use user settings
* * 1 = fully justify
* * 2 = do not fully justify
* * Bits 2 - 3 : 0 = document is left justified
* * 1 = document is centered
* * 2 = document is right justified
* * 3 = don ' t know document justification yet
* * Bits 0 - 1 : 0 = left justify document
* * 1 = center document
* * 2 = right justify document
* * 3 = Use user settings
* *
* * force_scale = - 2.0 : Fit column width to display width
* * force_scale = - 1.0 : Use output dpi unless the region doesn ' t fit .
* * In that case , scale it down until it fits .
* * force_scale > 0.0 : Scale region by force_scale .
* *
* * mark_flags & 1 : Mark top
* * mark_flags & 2 : Mark bottom
* * mark_flags & 4 : Mark left
* * mark_flags & 8 : Mark right
* *
* * trim_flags & 0x80 : Do NOT re - trim no matter what .
* *
*/
static void bmpregion_add ( BMPREGION * region , BREAKINFO * breakinfo ,
MASTERINFO * masterinfo , int allow_text_wrapping , int trim_flags ,
int allow_vertical_breaks , double force_scale , int justification_flags ,
int caller_id , int * colcount , int * rowcount , PAGEINFO * pageinfo ,
int mark_flags , int rowbase_delta )
{
int w , wmax , i , nc , nr , h , bpp , tall_region ;
double region_width_inches ;
WILLUSBITMAP * bmp , _bmp ;
BMPREGION * newregion , _newregion ;
newregion = & _newregion ;
( * newregion ) = ( * region ) ;
# if (WILLUSDEBUGX & 1)
printf ( " @bmpregion_add (%d,%d) - (%d,%d) \n " , region - > c1 , region - > r1 , region - > c2 , region - > r2 ) ;
printf ( " trimflags = %X \n " , trim_flags ) ;
# endif
if ( debug ) {
if ( ! allow_text_wrapping )
printf ( " @bmpregion_add (no break) (%d,%d) - (%d,%d) (scale=%g) \n " ,
region - > c1 , region - > r1 , region - > c2 , region - > r2 ,
force_scale ) ;
else
printf (
" @bmpregion_add (allow break) (%d,%d) - (%d,%d) (scale=%g) \n " ,
region - > c1 , region - > r1 , region - > c2 , region - > r2 ,
force_scale ) ;
}
/*
* * Tag blank rows and columns and trim the blank margins off
* * trimflags = 0xf for all margin trim .
* * trimflags = 0xc for just top and bottom margins .
*/
bmpregion_trim_margins ( newregion , colcount , rowcount , trim_flags ) ;
# if (WILLUSDEBUGX & 1)
printf ( " After trim: (%d,%d) - (%d,%d) \n " , newregion - > c1 , newregion - > r1 , newregion - > c2 , newregion - > r2 ) ;
# endif
nc = newregion - > c2 - newregion - > c1 + 1 ;
nr = newregion - > r2 - newregion - > r1 + 1 ;
// printf("nc=%d, nr=%d\n",nc,nr);
if ( verbose ) {
printf ( " row range adjusted to %d - %d \n " , newregion - > r1 ,
newregion - > r2 ) ;
printf ( " col range adjusted to %d - %d \n " , newregion - > c1 ,
newregion - > c2 ) ;
}
if ( nc < = 5 | | nr < = 1 )
return ;
region_width_inches = ( double ) nc / src_dpi ;
// printf("regwidth = %g in\n",region_width_inches);
/* Use untrimmed region left/right if possible */
if ( caller_id = = 1 & & region_width_inches < = max_region_width_inches ) {
int trimleft , trimright ;
int maxpix , dpix ;
maxpix = ( int ) ( max_region_width_inches * src_dpi + .5 ) ;
# if (WILLUSDEBUGX & 1)
printf ( " Trimming. C's = %4d %4d %4d %4d \n " , region - > c1 , newregion - > c1 , newregion - > c2 , region - > c2 ) ;
printf ( " maxpix = %d, regwidth = %d \n " , maxpix , region - > c2 - region - > c1 + 1 ) ;
# endif
if ( maxpix > ( region - > c2 - region - > c1 + 1 ) )
maxpix = region - > c2 - region - > c1 + 1 ;
// printf(" maxpix = %d\n",maxpix);
dpix = ( region - > c2 - region - > c1 + 1 - maxpix ) / 2 ;
// printf(" dpix = %d\n",dpix);
trimright = region - > c2 - newregion - > c2 ;
trimleft = newregion - > c1 - region - > c1 ;
if ( trimleft < trimright ) {
if ( trimleft > dpix )
newregion - > c1 = region - > c1 + dpix ;
newregion - > c2 = newregion - > c1 + maxpix - 1 ;
} else {
if ( trimright > dpix )
newregion - > c2 = region - > c2 - dpix ;
newregion - > c1 = newregion - > c2 - maxpix + 1 ;
}
if ( newregion - > c1 < region - > c1 )
newregion - > c1 = region - > c1 ;
if ( newregion - > c2 > region - > c2 )
newregion - > c2 = region - > c2 ;
nc = newregion - > c2 - newregion - > c1 + 1 ;
# if (WILLUSDEBUGX & 1)
printf ( " Post Trim. C's = %4d %4d %4d %4d \n " , region - > c1 , newregion - > c1 , newregion - > c2 , region - > c2 ) ;
# endif
region_width_inches = ( double ) nc / src_dpi ;
}
/*
* * Try breaking the region into smaller horizontal pieces ( wrap text lines )
*/
/*
printf ( " allow_text_wrapping=%d, region_width_inches=%g, max_region_width_inches=%g \n " ,
allow_text_wrapping , region_width_inches , max_region_width_inches ) ;
*/
/* New in v1.50, if allow_text_wrapping==2, unwrap short lines. */
if ( allow_text_wrapping = = 2
| | ( allow_text_wrapping = = 1
& & region_width_inches > max_region_width_inches ) ) {
bmpregion_analyze_justification_and_line_spacing ( newregion , breakinfo ,
masterinfo , colcount , rowcount , pageinfo , 1 , force_scale ) ;
return ;
}
/*
* * If allowed , re - submit each vertical region individually
*/
if ( allow_vertical_breaks ) {
bmpregion_analyze_justification_and_line_spacing ( newregion , breakinfo ,
masterinfo , colcount , rowcount , pageinfo , 0 , force_scale ) ;
return ;
}
/* AT THIS POINT, BITMAP IS NOT TO BE BROKEN UP HORIZONTALLY OR VERTICALLY */
/* (IT CAN STILL BE FULLY JUSTIFIED IF ALLOWED.) */
/*
* * Scale region to fit the destination device width and add to the master bitmap .
* *
* *
* * Start by copying source region to new bitmap
* *
*/
// printf("c1=%d\n",newregion->c1);
/* Is it a figure? */
tall_region = ( double ) ( newregion - > r2 - newregion - > r1 + 1 ) / src_dpi
> = dst_min_figure_height_in ;
/* Re-trim left and right? */
if ( ( trim_flags & 0x80 ) = = 0 ) {
/* If tall region and figure justification turned on ... */
if ( ( tall_region & & dst_figure_justify > = 0 )
/* ... or if centered region ... */
| | ( ( trim_flags & 3 ) ! = 3
& & ( ( justification_flags & 3 ) = = 1
| | ( ( justification_flags & 3 ) = = 3
& & ( dst_justify = = 1
| | ( dst_justify < 0
& & ( justification_flags
& 0xc ) = = 4 ) ) ) ) ) ) {
bmpregion_trim_margins ( newregion , colcount , rowcount , 0x3 ) ;
nc = newregion - > c2 - newregion - > c1 + 1 ;
region_width_inches = ( double ) nc / src_dpi ;
}
}
# if (WILLUSDEBUGX & 1)
aprintf ( " atomic region: " ANSI_CYAN " %.2f x %.2f in " ANSI_NORMAL " c1=%d, (%d x %d) (rbdel=%d) just=0x%02X \n " ,
( double ) ( newregion - > c2 - newregion - > c1 + 1 ) / src_dpi ,
( double ) ( newregion - > r2 - newregion - > r1 + 1 ) / src_dpi ,
newregion - > c1 ,
( newregion - > c2 - newregion - > c1 + 1 ) ,
( newregion - > r2 - newregion - > r1 + 1 ) ,
rowbase_delta , justification_flags ) ;
# endif
/* Copy atomic region into bmp */
bmp = & _bmp ;
bmp_init ( bmp ) ;
bmp - > width = nc ;
bmp - > height = nr ;
if ( dst_color )
bmp - > bpp = 24 ;
else {
bmp - > bpp = 8 ;
for ( i = 0 ; i < 256 ; i + + )
bmp - > red [ i ] = bmp - > blue [ i ] = bmp - > green [ i ] = i ;
}
bmp_alloc ( bmp ) ;
bpp = dst_color ? 3 : 1 ;
// printf("r1=%d, r2=%d\n",newregion->r1,newregion->r2);
for ( i = newregion - > r1 ; i < = newregion - > r2 ; i + + ) {
unsigned char * psrc , * pdst ;
pdst = bmp_rowptr_from_top ( bmp , i - newregion - > r1 ) ;
psrc = bmp_rowptr_from_top ( dst_color ? newregion - > bmp : newregion - > bmp8 ,
i ) + bpp * newregion - > c1 ;
memcpy ( pdst , psrc , nc * bpp ) ;
}
/*
* * Now scale to appropriate destination size .
* *
* * force_scale is used to maintain uniform scaling so that
* * most of the regions are scaled at the same value .
* *
* * force_scale = - 2.0 : Fit column width to display width
* * force_scale = - 1.0 : Use output dpi unless the region doesn ' t fit .
* * In that case , scale it down until it fits .
* * force_scale > 0.0 : Scale region by force_scale .
* *
*/
/* Max viewable pixel width on device screen */
wmax = ( int ) ( masterinfo - > bmp . width - ( dst_marleft + dst_marright ) * dst_dpi
+ 0.5 ) ;
if ( force_scale > 0. )
w = ( int ) ( force_scale * bmp - > width + 0.5 ) ;
else {
if ( region_width_inches < max_region_width_inches )
w = ( int ) ( region_width_inches * dst_dpi + .5 ) ;
else
w = wmax ;
}
/* Special processing for tall regions (likely figures) */
if ( tall_region & & w < wmax & & dst_fit_to_page ! = 0 ) {
if ( dst_fit_to_page < 0 )
w = wmax ;
else {
w = ( int ) ( w * ( 1. + ( double ) dst_fit_to_page / 100. ) + 0.5 ) ;
if ( w > wmax )
w = wmax ;
}
}
h = ( int ) ( ( ( double ) w / bmp - > width ) * bmp - > height + .5 ) ;
/*
* * If scaled dimensions are finite , add to master bitmap .
*/
if ( w > 0 & & h > 0 ) {
WILLUSBITMAP * tmp , _tmp ;
int nocr ;
last_scale_factor_internal = ( double ) w / bmp - > width ;
# ifdef HAVE_OCR
if ( dst_ocr )
{
nocr = ( int ) ( ( double ) bmp - > width / w + 0.5 ) ;
if ( nocr < 1 )
nocr = 1 ;
if ( nocr > 10 )
nocr = 10 ;
w * = nocr ;
h * = nocr ;
}
else
# endif
nocr = 1 ;
tmp = & _tmp ;
bmp_init ( tmp ) ;
bmp_resample ( tmp , bmp , ( double ) 0. , ( double ) 0. , ( double ) bmp - > width ,
( double ) bmp - > height , w , h ) ;
bmp_free ( bmp ) ;
/*
{
static int nn = 0 ;
char filename [ 256 ] ;
sprintf ( filename , " xxx%02d.png " , nn + + ) ;
bmp_write ( tmp , filename , stdout , 100 ) ;
}
*/
/*
* * Add scaled bitmap to destination .
*/
/* Allocate more rows if necessary */
while ( masterinfo - > rows + tmp - > height / nocr > masterinfo - > bmp . height )
bmp_more_rows ( & masterinfo - > bmp , 1.4 , 255 ) ;
/* Check special justification for tall regions */
if ( tall_region & & dst_figure_justify > = 0 )
justification_flags = dst_figure_justify ;
bmp_src_to_dst ( masterinfo , tmp , justification_flags , region - > bgcolor ,
nocr , ( int ) ( ( double ) src_dpi * tmp - > width / bmp - > width + .5 ) ) ;
bmp_free ( tmp ) ;
}
/* Store delta to base of text row (used by wrapbmp_flush()) */
last_rowbase_internal = rowbase_delta ;
/* .05 was .072 in v1.35 */
/* dst_add_gap(&masterinfo->bmp,&masterinfo->rows,0.05); */
/*
if ( revert )
restore_output_dpi ( ) ;
*/
}
static void dst_add_gap_src_pixels ( char * caller , MASTERINFO * masterinfo ,
int pixels )
{
double gap_inches ;
/*
aprintf ( " %s " ANSI_GREEN " dst_add " ANSI_NORMAL " %.3f in (%d pix) \n " , caller , ( double ) pixels / src_dpi , pixels ) ;
*/
if ( last_scale_factor_internal < 0. )
gap_inches = ( double ) pixels / src_dpi ;
else
gap_inches = ( double ) pixels * last_scale_factor_internal / dst_dpi ;
gap_inches * = vertical_multiplier ;
if ( gap_inches > max_vertical_gap_inches )
gap_inches = max_vertical_gap_inches ;
dst_add_gap ( masterinfo , gap_inches ) ;
}
static void dst_add_gap ( MASTERINFO * masterinfo , double inches )
{
int n , bw ;
unsigned char * p ;
n = ( int ) ( inches * dst_dpi + .5 ) ;
if ( n < 1 )
n = 1 ;
while ( masterinfo - > rows + n > masterinfo - > bmp . height )
bmp_more_rows ( & masterinfo - > bmp , 1.4 , 255 ) ;
bw = bmp_bytewidth ( & masterinfo - > bmp ) * n ;
p = bmp_rowptr_from_top ( & masterinfo - > bmp , masterinfo - > rows ) ;
memset ( p , 255 , bw ) ;
masterinfo - > rows + = n ;
}
/*
* *
* * Add already - scaled source bmp to destination bmp .
* * Source bmp may be narrower than destination - - if so , it may be fully justifed .
* * dst = destination bitmap
* * src = source bitmap
* * dst and src bpp must match !
* * All rows of src are applied to masterinfo - > bmp starting at row masterinfo - > rows
* * Full justification is done if requested .
* *
*/
static void bmp_src_to_dst ( MASTERINFO * masterinfo , WILLUSBITMAP * src ,
int justification_flags , int whitethresh , int nocr , int dpi )
{
WILLUSBITMAP * src1 , _src1 ;
WILLUSBITMAP * tmp ;
# ifdef HAVE_OCR
WILLUSBITMAP _tmp ;
OCRWORDS _words , * words ;
# endif
int dw , dw2 ;
int i , srcbytespp , srcbytewidth , go_full ;
int destwidth , destx0 , just ;
if ( src - > width < = 0 | | src - > height < = 0 )
return ;
/*
printf ( " @bmp_src_to_dst. dst->bpp=%d, src->bpp=%d, src=%d x %d \n " , masterinfo - > bmp . bpp , src - > bpp , src - > width , src - > height ) ;
*/
/*
{
static int count = 0 ;
static char filename [ 256 ] ;
printf ( " @bmp_src_to_dst... \n " ) ;
sprintf ( filename , " src%05d.png " , count + + ) ;
bmp_write ( src , filename , stdout , 100 ) ;
}
*/
/*
if ( fulljust & & dst_fulljustify )
printf ( " srcbytespp=%d, srcbytewidth=%d, destwidth=%d, destx0=%d, destbytewidth=%d \n " ,
srcbytespp , srcbytewidth , destwidth , destx0 , dstbytewidth ) ;
*/
/* Determine what justification to use */
/* Left? */
if ( ( justification_flags & 3 ) = = 0 /* Mandatory left just */
| | ( ( justification_flags & 3 ) = = 3 /* Use user settings */
& & ( dst_justify = = 0
| | ( dst_justify < 0
& & ( justification_flags & 0xc ) = = 0 ) ) ) )
just = 0 ;
else if ( ( justification_flags & 3 ) = = 2
| | ( ( justification_flags & 3 ) = = 3
& & ( dst_justify = = 2
| | ( dst_justify < 0
& & ( justification_flags & 0xc ) = = 8 ) ) ) )
just = 2 ;
else
just = 1 ;
/* Full justification? */
destwidth = ( int ) ( masterinfo - > bmp . width
- ( dst_marleft + dst_marright ) * dst_dpi + .5 ) ;
go_full = ( destwidth * nocr > src - > width
& & ( ( ( justification_flags & 0x30 ) = = 0x10 )
| | ( ( justification_flags & 0x30 ) = = 0 // Use user settings
& & ( dst_fulljustify = = 1
| | ( dst_fulljustify < 0
& & ( justification_flags & 0xc0 )
= = 0x40 ) ) ) ) ) ;
/* Put fully justified text into src1 bitmap */
if ( go_full ) {
src1 = & _src1 ;
bmp_init ( src1 ) ;
bmp_fully_justify ( src1 , src , nocr * destwidth , whitethresh , just ) ;
} else
src1 = src ;
# if (WILLUSDEBUGX & 1)
printf ( " @bmp_src_to_dst: jflags=0x%02X just=%d, go_full=%d \n " , justification_flags , just , go_full ) ;
printf ( " destx0=%d, destwidth=%d, src->width=%d \n " , destx0 , destwidth , src - > width ) ;
# endif
# ifdef HAVE_OCR
if ( dst_ocr )
{
/* Run OCR on the bitmap */
words = & _words ;
ocrwords_init ( words ) ;
ocrwords_fill_in ( words , src1 , whitethresh , dpi ) ;
/* Scale bitmap and word positions to destination size */
if ( nocr > 1 )
{
tmp = & _tmp ;
bmp_init ( tmp ) ;
bmp_integer_resample ( tmp , src1 , nocr ) ;
ocrwords_int_scale ( words , nocr ) ;
}
else
tmp = src1 ;
}
else
# endif
tmp = src1 ;
/*
printf ( " writing... \n " ) ;
ocrwords_box ( words , tmp ) ;
bmp_write ( tmp , " out.png " , stdout , 100 ) ;
exit ( 10 ) ;
*/
destx0 = ( int ) ( dst_marleft * dst_dpi + .5 ) ;
if ( just = = 0 )
dw = destx0 ;
else if ( just = = 1 )
dw = destx0 + ( destwidth - tmp - > width ) / 2 ;
else
dw = destx0 + destwidth - tmp - > width ;
if ( dw < 0 )
dw = 0 ;
/* Add OCR words to destination list */
# ifdef HAVE_OCR
if ( dst_ocr )
{
ocrwords_offset ( words , dw , masterinfo - > rows ) ;
ocrwords_concatenate ( dst_ocrwords , words ) ;
ocrwords_free ( words ) ;
}
# endif
/* Add tmp bitmap to dst */
srcbytespp = tmp - > bpp = = 24 ? 3 : 1 ;
srcbytewidth = tmp - > width * srcbytespp ;
dw2 = masterinfo - > bmp . width - tmp - > width - dw ;
dw * = srcbytespp ;
dw2 * = srcbytespp ;
for ( i = 0 ; i < tmp - > height ; i + + , masterinfo - > rows + + ) {
unsigned char * pdst , * psrc ;
psrc = bmp_rowptr_from_top ( tmp , i ) ;
pdst = bmp_rowptr_from_top ( & masterinfo - > bmp , masterinfo - > rows ) ;
memset ( pdst , 255 , dw ) ;
pdst + = dw ;
memcpy ( pdst , psrc , srcbytewidth ) ;
pdst + = srcbytewidth ;
memset ( pdst , 255 , dw2 ) ;
}
# ifdef HAVE_OCR
if ( dst_ocr & & nocr > 1 )
bmp_free ( tmp ) ;
# endif
if ( go_full )
bmp_free ( src1 ) ;
}
/*
* * Spread words out in src and put into jbmp at scaling nocr
* * In case the text can ' t be expanded enough ,
* * just = 0 ( left justify ) , 1 ( center ) , 2 ( right justify )
*/
static void bmp_fully_justify ( WILLUSBITMAP * jbmp , WILLUSBITMAP * src ,
int jbmpwidth , int whitethresh , int just )
{
BMPREGION srcregion ;
BREAKINFO * colbreaks , _colbreaks ;
WILLUSBITMAP gray ;
int * gappos , * gapsize ;
int i , srcbytespp , srcbytewidth , jbmpbytewidth , newwidth , destx0 , ng ;
static char * funcname = " bmp_fully_justify " ;
/*
{
char filename [ 256 ] ;
count + + ;
sprintf ( filename , " out%03d.png " , count ) ;
bmp_write ( src , filename , stdout , 100 ) ;
}
*/
/* Init/allocate destination bitmap */
jbmp - > width = jbmpwidth ;
jbmp - > height = src - > height ;
jbmp - > bpp = src - > bpp ;
if ( jbmp - > bpp = = 8 )
for ( i = 0 ; i < 256 ; i + + )
jbmp - > red [ i ] = jbmp - > green [ i ] = jbmp - > blue [ i ] = i ;
bmp_alloc ( jbmp ) ;
/* Find breaks in the text row */
colbreaks = & _colbreaks ;
colbreaks - > textrow = NULL ;
srcregion . bgcolor = whitethresh ;
srcregion . c1 = 0 ;
srcregion . c2 = src - > width - 1 ;
srcregion . r1 = 0 ;
srcregion . r2 = src - > height - 1 ;
srcbytespp = src - > bpp = = 24 ? 3 : 1 ;
if ( srcbytespp = = 3 ) {
srcregion . bmp = src ;
srcregion . bmp8 = & gray ;
bmp_init ( srcregion . bmp8 ) ;
bmp_convert_to_greyscale_ex ( srcregion . bmp8 , src ) ;
} else {
srcregion . bmp = src ;
srcregion . bmp8 = src ;
}
breakinfo_alloc ( 103 , colbreaks , src - > width ) ;
{
int * colcount , * rowcount ;
colcount = rowcount = NULL ;
willus_dmem_alloc_warn ( 8 , ( void * * ) & colcount ,
sizeof ( int ) * ( src - > width + src - > height ) , funcname , 10 ) ;
rowcount = & colcount [ src - > width ] ;
bmpregion_one_row_find_breaks ( & srcregion , colbreaks , colcount , rowcount ,
1 ) ;
willus_dmem_free ( 8 , ( double * * ) & colcount , funcname ) ;
}
if ( srcbytespp = = 3 )
bmp_free ( srcregion . bmp8 ) ;
ng = colbreaks - > n - 1 ;
gappos = NULL ;
if ( ng > 0 ) {
int maxsize , ms2 , mingap , j ;
willus_dmem_alloc_warn ( 9 , ( void * * ) & gappos , ( 2 * sizeof ( int ) ) * ng ,
funcname , 10 ) ;
gapsize = & gappos [ ng ] ;
for ( i = 0 ; i < ng ; i + + ) {
gappos [ i ] = colbreaks - > textrow [ i ] . c2 + 1 ;
gapsize [ i ] = colbreaks - > textrow [ i ] . gap ;
}
/* Take only the largest group of gaps */
for ( maxsize = i = 0 ; i < ng ; i + + )
if ( maxsize < gapsize [ i ] )
maxsize = gapsize [ i ] ;
mingap = srcregion . lcheight * word_spacing ;
if ( mingap < 2 )
mingap = 2 ;
if ( maxsize > mingap )
maxsize = mingap ;
ms2 = maxsize / 2 ;
for ( i = j = 0 ; i < ng ; i + + )
if ( gapsize [ i ] > ms2 ) {
if ( j ! = i ) {
gapsize [ j ] = gapsize [ i ] ;
gappos [ j ] = gappos [ i ] ;
}
j + + ;
}
ng = j ;
/* Figure out total pixel expansion */
newwidth = src - > width * 1.25 ;
if ( newwidth > jbmp - > width )
newwidth = jbmp - > width ;
} else
newwidth = src - > width ;
breakinfo_free ( 103 , colbreaks ) ;
/* Starting column in destination bitmap */
if ( just = = 1 )
destx0 = ( jbmp - > width - newwidth ) / 2 ;
else if ( just = = 2 )
destx0 = ( jbmp - > width - newwidth ) ;
else
destx0 = 0 ;
jbmpbytewidth = bmp_bytewidth ( jbmp ) ;
srcbytewidth = bmp_bytewidth ( src ) ;
/* Clear entire fully justified bitmap */
memset ( bmp_rowptr_from_top ( jbmp , 0 ) , 255 , jbmpbytewidth * jbmp - > height ) ;
/* Spread out source pieces to fully justify them */
for ( i = 0 ; i < = ng ; i + + ) {
int j , dx0 , dx , sx0 ;
unsigned char * pdst , * psrc ;
dx = i < ng ?
( i > 0 ? gappos [ i ] - gappos [ i - 1 ] : gappos [ i ] + 1 ) :
( i > 0 ? src - > width - ( gappos [ i - 1 ] + 1 ) : src - > width ) ;
dx * = srcbytespp ;
sx0 = i = = 0 ? 0 : ( gappos [ i - 1 ] + 1 ) ;
dx0 = destx0 + sx0 + ( i = = 0 ? 0 : ( newwidth - src - > width ) * i / ng ) ;
psrc = bmp_rowptr_from_top ( src , 0 ) + sx0 * srcbytespp ;
pdst = bmp_rowptr_from_top ( jbmp , 0 ) + dx0 * srcbytespp ;
for ( j = 0 ; j < src - > height ; j + + , pdst + = jbmpbytewidth , psrc + =
srcbytewidth )
memcpy ( pdst , psrc , dx ) ;
}
if ( gappos ! = NULL )
willus_dmem_free ( 9 , ( double * * ) & gappos , funcname ) ;
}
/*
* * flags & 1 : trim c1
* * flags & 2 : trim c2
* * flags & 4 : trim r1
* * flags & 8 : trim r2
* * flags & 16 : Find rowbase , font size , etc .
* *
* * Row base is where row dist crosses 50 % on r2 side .
* * Font size is where row dist crosses 5 % on other side ( r1 side ) .
* * Lowercase font size is where row dist crosses 50 % on r1 side .
* *
* * For 12 pt font :
* * Single spacing is 14.66 pts ( Calibri ) , 13.82 pts ( Times ) , 13.81 pts ( Arial )
* * Size of cap letter is 7.7 pts ( Calibri ) , 8.1 pts ( Times ) , 8.7 pts ( Arial )
* * Size of small letter is 5.7 pts ( Calibri ) , 5.6 pts ( Times ) , 6.5 pts ( Arial )
* * Mean line spacing = 1.15 - 1.22 ( ~ 1.16 )
* * Mean cap height = 0.68
* * Mean small letter height = 0.49
* *
*/
static void bmpregion_trim_margins ( BMPREGION * region , int * colcount0 ,
int * rowcount0 , int flags )
{
int i , j , n ; /* ,r1,r2,dr1,dr2,dr,vtrim,vspace; */
int * colcount , * rowcount ;
static char * funcname = " bmpregion_trim_margins " ;
/* To detect a hyphen, we need to trim and calc text base row */
if ( flags & 32 )
flags | = 0x1f ;
if ( colcount0 = = NULL )
willus_dmem_alloc_warn ( 10 , ( void * * ) & colcount ,
sizeof ( int ) * ( region - > c2 + 1 ) , funcname , 10 ) ;
else
colcount = colcount0 ;
if ( rowcount0 = = NULL )
willus_dmem_alloc_warn ( 11 , ( void * * ) & rowcount ,
sizeof ( int ) * ( region - > r2 + 1 ) , funcname , 10 ) ;
else
rowcount = rowcount0 ;
n = region - > c2 - region - > c1 + 1 ;
/*
printf ( " Trim: reg=(%d,%d) - (%d,%d) \n " , region - > c1 , region - > r1 , region - > c2 , region - > r2 ) ;
if ( region - > c2 + 1 > cca | | region - > r2 + 1 > rca )
{
printf ( " A ha 0! \n " ) ;
exit ( 10 ) ;
}
*/
memset ( colcount , 0 , ( region - > c2 + 1 ) * sizeof ( int ) ) ;
memset ( rowcount , 0 , ( region - > r2 + 1 ) * sizeof ( int ) ) ;
for ( j = region - > r1 ; j < = region - > r2 ; j + + ) {
unsigned char * p ;
p = bmp_rowptr_from_top ( region - > bmp8 , j ) + region - > c1 ;
for ( i = 0 ; i < n ; i + + , p + + )
if ( p [ 0 ] < region - > bgcolor ) {
rowcount [ j ] + + ;
colcount [ i + region - > c1 ] + + ;
}
}
/*
* * Trim excess margins
*/
if ( flags & 1 )
trim_to ( colcount , & region - > c1 , region - > c2 ,
src_left_to_right ? 2.0 : 4.0 ) ;
if ( flags & 2 )
trim_to ( colcount , & region - > c2 , region - > c1 ,
src_left_to_right ? 4.0 : 2.0 ) ;
if ( colcount0 = = NULL )
willus_dmem_free ( 10 , ( double * * ) & colcount , funcname ) ;
if ( flags & 4 )
trim_to ( rowcount , & region - > r1 , region - > r2 , 4.0 ) ;
if ( flags & 8 )
trim_to ( rowcount , & region - > r2 , region - > r1 , 4.0 ) ;
if ( flags & 16 ) {
int maxcount , mc2 , h2 ;
double f ;
maxcount = 0 ;
for ( i = region - > r1 ; i < = region - > r2 ; i + + )
if ( rowcount [ i ] > maxcount )
maxcount = rowcount [ i ] ;
mc2 = maxcount / 2 ;
for ( i = region - > r2 ; i > = region - > r1 ; i - - )
if ( rowcount [ i ] > mc2 )
break ;
region - > rowbase = i ;
for ( i = region - > r1 ; i < = region - > r2 ; i + + )
if ( rowcount [ i ] > mc2 )
break ;
region - > h5050 = region - > lcheight = region - > rowbase - i + 1 ;
mc2 = maxcount / 20 ;
for ( i = region - > r1 ; i < = region - > r2 ; i + + )
if ( rowcount [ i ] > mc2 )
break ;
region - > capheight = region - > rowbase - i + 1 ;
/*
* * Sanity check capheight and lcheight
*/
h2 = height2_calc ( & rowcount [ region - > r1 ] , region - > r2 - region - > r1 + 1 ) ;
# if (WILLUSDEBUGX & 8)
if ( region - > c2 - region - > c1 > 1500 )
printf ( " reg %d x %d (%d,%d) - (%d,%d) h2=%d ch/h2=%g \n " , region - > c2 - region - > c1 + 1 , region - > r2 - region - > r1 + 1 , region - > c1 , region - > r1 , region - > c2 , region - > r2 , h2 , ( double ) region - > capheight / h2 ) ;
# endif
if ( region - > capheight < h2 * 0.75 )
region - > capheight = h2 ;
f = ( double ) region - > lcheight / region - > capheight ;
if ( f < 0.55 )
region - > lcheight = ( int ) ( 0.72 * region - > capheight + .5 ) ;
else if ( f > 0.85 )
region - > lcheight = ( int ) ( 0.72 * region - > capheight + .5 ) ;
# if (WILLUSDEBUGX & 8)
if ( region - > c2 - region - > c1 > 1500 )
printf ( " lcheight final = %d \n " , region - > lcheight ) ;
# endif
# if (WILLUSDEBUGX & 10)
if ( region - > c2 - region - > c1 > 1500 & & region - > r2 - region - > r1 < 100 )
{
static int append = 0 ;
FILE * f ;
int i ;
f = fopen ( " textrows.ep " , append = = 0 ? " w " : " a " ) ;
append = 1 ;
for ( i = region - > r1 ; i < = region - > r2 ; i + + )
fprintf ( f , " %d %g \n " , region - > rowbase - i , ( double ) rowcount [ i ] / maxcount ) ;
fprintf ( f , " //nc \n " ) ;
fclose ( f ) ;
}
# endif
} else {
region - > h5050 = region - > r2 - region - > r1 + 1 ;
region - > capheight = 0.68 * ( region - > r2 - region - > r1 + 1 ) ;
region - > lcheight = 0.5 * ( region - > r2 - region - > r1 + 1 ) ;
region - > rowbase = region - > r2 ;
}
# if (WILLUSDEBUGX & 2)
printf ( " trim: \n reg->c1=%d, reg->c2=%d \n " , region - > c1 , region - > c2 ) ;
printf ( " reg->r1=%d, reg->r2=%d, reg->rowbase=%d \n \n " , region - > r1 , region - > r2 , region - > rowbase ) ;
# endif
if ( rowcount0 = = NULL )
willus_dmem_free ( 11 , ( double * * ) & rowcount , funcname ) ;
}
/*
* * Does region end in a hyphen ? If so , fill in HYPHENINFO structure .
*/
static void bmpregion_hyphen_detect ( BMPREGION * region )
{
int i , j ; /* ,r1,r2,dr1,dr2,dr,vtrim,vspace; */
int width ;
int * r0 , * r1 , * r2 , * r3 ;
int rmin , rmax , rowbytes , nrmid , rsum ;
int cstart , cend , cdir ;
unsigned char * p ;
static char * funcname = " bmpregion_hyphen_detect " ;
# if (WILLUSDEBUGX & 16)
static int count = 0 ;
char pngfile [ 256 ] ;
FILE * out ;
count + + ;
printf ( " @bmpregion_hyphen_detect count=%d \n " , count ) ;
sprintf ( pngfile , " word%04d.png " , count ) ;
bmpregion_write ( region , pngfile ) ;
sprintf ( pngfile , " word%04d.txt " , count ) ;
out = fopen ( pngfile , " w " ) ;
fprintf ( out , " c1=%d, c2=%d, r1=%d, r2=%d \n " , region - > c1 , region - > c2 , region - > r1 , region - > r2 ) ;
fprintf ( out , " lcheight=%d \n " , region - > lcheight ) ;
# endif
region - > hyphen . ch = - 1 ;
region - > hyphen . c2 = - 1 ;
if ( ! k2_hyphen_detect )
return ;
width = region - > c2 - region - > c1 + 1 ;
if ( width < 2 )
return ;
willus_dmem_alloc_warn ( 27 , ( void * * ) & r0 , sizeof ( int ) * 4 * width , funcname ,
10 ) ;
r1 = & r0 [ width ] ;
r2 = & r1 [ width ] ;
r3 = & r2 [ width ] ;
for ( i = 0 ; i < width ; i + + )
r0 [ i ] = r1 [ i ] = r2 [ i ] = r3 [ i ] = - 1 ;
rmin = region - > rowbase - region - > capheight - region - > lcheight * .04 ;
if ( rmin < region - > r1 )
rmin = region - > r1 ;
rmax = region - > rowbase + region - > lcheight * .04 ;
if ( rmax > region - > r2 )
rmax = region - > r2 ;
rowbytes = bmp_bytewidth ( region - > bmp8 ) ;
p = bmp_rowptr_from_top ( region - > bmp8 , 0 ) ;
nrmid = rsum = 0 ;
if ( src_left_to_right ) {
cstart = region - > c2 ;
cend = region - > c1 - 1 ;
cdir = - 1 ;
} else {
cstart = region - > c1 ;
cend = region - > c2 + 1 ;
cdir = 1 ;
}
# if (WILLUSDEBUGX & 16)
fprintf ( out , " j r0 r1 r2 r3 \n " ) ;
# endif
for ( j = cstart ; j ! = cend ; j + = cdir ) {
int r , rmid , dr , drmax ;
// printf("j=%d\n",j);
rmid = ( rmin + rmax ) / 2 ;
// printf(" rmid=%d\n",rmid);
drmax = region - > r2 + 1 - rmid > rmid - region - > r1 + 1 ?
region - > r2 + 1 - rmid : rmid - region - > r1 + 1 ;
/* Find dark region closest to center line */
for ( dr = 0 ; dr < drmax ; dr + + ) {
if ( rmid + dr < = region - > r2
& & p [ ( rmid + dr ) * rowbytes + j ] < region - > bgcolor )
break ;
if ( rmid - dr > = region - > r1
& & p [ ( rmid - dr ) * rowbytes + j ] < region - > bgcolor ) {
dr = - dr ;
break ;
}
}
# if (WILLUSDEBUGX & 16)
fprintf ( out , " dr=%d/%d, rmid+dr=%d, rmin=%d, rmax=%d, nrmid=%d \n " , dr , drmax , rmid + dr , rmin , rmax , nrmid ) ;
# endif
/* No dark detected or mark is outside hyphen region? */
/* Termination criterion #1 */
if ( dr > = drmax
| | ( nrmid > 2 & & ( double ) nrmid / region - > lcheight > .1
& & ( rmid + dr < rmin | | rmid + dr > rmax ) ) ) {
if ( region - > hyphen . ch > = 0 & & dr > = drmax )
continue ;
if ( nrmid > 2 & & ( double ) nrmid / region - > lcheight > .35 ) {
region - > hyphen . ch = j - cdir ;
region - > hyphen . r1 = rmin ;
region - > hyphen . r2 = rmax ;
}
if ( dr < drmax ) {
region - > hyphen . c2 = j ;
break ;
}
continue ;
}
if ( region - > hyphen . ch > = 0 ) {
region - > hyphen . c2 = j ;
break ;
}
nrmid + + ;
rmid + = dr ;
/* Dark spot is outside expected hyphen area */
/*
if ( rmid < rmin | | rmid > rmax )
{
if ( nrmid > 0 )
break ;
continue ;
}
*/
for ( r = rmid ; r > = region - > r1 ; r - - )
if ( p [ r * rowbytes + j ] > = region - > bgcolor )
break ;
r1 [ j - region - > c1 ] = r + 1 ;
r0 [ j - region - > c1 ] = - 1 ;
if ( r > = region - > r1 ) {
for ( ; r > = region - > r1 ; r - - )
if ( p [ r * rowbytes + j ] < region - > bgcolor )
break ;
if ( r > = region - > r1 )
r0 [ j - region - > c1 ] = r ;
}
for ( r = rmid ; r < = region - > r2 ; r + + )
if ( p [ r * rowbytes + j ] > = region - > bgcolor )
break ;
r2 [ j - region - > c1 ] = r - 1 ;
r3 [ j - region - > c1 ] = - 1 ;
if ( r < = region - > r2 ) {
for ( ; r < = region - > r2 ; r + + )
if ( p [ r * rowbytes + j ] < region - > bgcolor )
break ;
if ( r < = region - > r2 )
r3 [ j - region - > c1 ] = r ;
}
# if (WILLUSDEBUGX & 16)
fprintf ( out , " %4d %4d %4d %4d %4d \n " , j , r0 [ j - region - > c1 ] , r1 [ j - region - > c1 ] , r2 [ j - region - > c1 ] , r3 [ j - region - > c1 ] ) ;
# endif
if ( region - > hyphen . c2 < 0
& & ( r0 [ j - region - > c1 ] > = 0 | | r3 [ j - region - > c1 ] > = 0 ) )
region - > hyphen . c2 = j ;
/* Termination criterion #2 */
if ( nrmid > 2 & & ( double ) nrmid / region - > lcheight > .35
& & ( r1 [ j - region - > c1 ] > rmax | | r2 [ j - region - > c1 ] < rmin ) ) {
region - > hyphen . ch = j - cdir ;
region - > hyphen . r1 = rmin ;
region - > hyphen . r2 = rmax ;
if ( region - > hyphen . c2 < 0 )
region - > hyphen . c2 = j ;
break ;
}
// rc=(r1[j-region->c1]+r2[j-region->c1])/2;
/* DQ possible hyphen if r1/r2 out of range */
if ( nrmid > 1 ) {
/* Too far away from last values? */
if ( ( double ) ( rmin - r1 [ j - region - > c1 ] ) / region - > lcheight > .1
| | ( double ) ( r2 [ j - region - > c1 ] - rmax ) / region - > lcheight
> .1 )
break ;
if ( ( double ) nrmid / region - > lcheight > .1 & & nrmid > 1 ) {
if ( ( double ) fabs ( rmin - r1 [ j - region - > c1 ] ) / region - > lcheight
> .1
| | ( double ) ( rmax - r2 [ j - region - > c1 ] )
/ region - > lcheight > .1 )
break ;
}
}
if ( nrmid = = 1 | | r1 [ j - region - > c1 ] < rmin )
rmin = r1 [ j - region - > c1 ] ;
if ( nrmid = = 1 | | r2 [ j - region - > c1 ] > rmax )
rmax = r2 [ j - region - > c1 ] ;
if ( ( double ) nrmid / region - > lcheight > .1 & & nrmid > 1 ) {
double rmean ;
/* Can't be too thick */
if ( ( double ) ( rmax - rmin ) / region - > lcheight > .55
| | ( double ) ( rmax - rmin ) / region - > lcheight < .08 )
break ;
/* Must be reasonably well centered above baseline */
rmean = ( double ) ( rmax + rmin ) / 2 ;
if ( ( double ) ( region - > rowbase - rmean ) / region - > lcheight < 0.35
| | ( double ) ( region - > rowbase - rmean ) / region - > lcheight
> 0.85 )
break ;
if ( ( double ) ( region - > rowbase - rmax ) / region - > lcheight < 0.2
| | ( double ) ( region - > rowbase - rmin ) / region - > lcheight
> 0.92 )
break ;
}
}
# if (WILLUSDEBUGX & 16)
fprintf ( out , " ch=%d, c2=%d, r1=%d, r2=%d \n " , region - > hyphen . ch , region - > hyphen . c2 , region - > hyphen . r1 , region - > hyphen . r2 ) ;
fclose ( out ) ;
# endif
/* More sanity checks--better to miss a hyphen than falsely detect it. */
if ( region - > hyphen . ch > = 0 ) {
double ar ;
/* If it's only a hyphen, then it's probably actually a dash--don't detect it. */
if ( region - > hyphen . c2 < 0 )
region - > hyphen . ch = - 1 ;
/* Check aspect ratio */
ar = ( double ) ( region - > hyphen . r2 - region - > hyphen . r1 ) / nrmid ;
if ( ar < 0.08 | | ar > 0.75 )
region - > hyphen . ch = - 1 ;
}
willus_dmem_free ( 27 , ( double * * ) & r0 , funcname ) ;
# if (WILLUSDEBUGX & 16)
if ( region - > hyphen . ch > = 0 )
printf ( " \n \n GOT HYPHEN. \n \n " ) ;
printf ( " Exiting bmpregion_hyphen_detect \n " ) ;
# endif
}
# if (defined(WILLUSDEBUGX) || defined(WILLUSDEBUG))
static void bmpregion_write ( BMPREGION * region , char * filename )
{
int i , bpp ;
WILLUSBITMAP * bmp , _bmp ;
bmp = & _bmp ;
bmp_init ( bmp ) ;
bmp - > width = region - > c2 - region - > c1 + 1 ;
bmp - > height = region - > r2 - region - > r1 + 1 ;
bmp - > bpp = region - > bmp - > bpp ;
bpp = bmp - > bpp = = 8 ? 1 : 3 ;
bmp_alloc ( bmp ) ;
for ( i = 0 ; i < 256 ; i + + )
bmp - > red [ i ] = bmp - > green [ i ] = bmp - > blue [ i ] = i ;
for ( i = 0 ; i < bmp - > height ; i + + )
{
unsigned char * s , * d ;
s = bmp_rowptr_from_top ( region - > bmp , region - > r1 + i ) + region - > c1 * bpp ;
d = bmp_rowptr_from_top ( bmp , i ) ;
memcpy ( d , s , bmp - > width * bpp ) ;
}
bmp_write ( bmp , filename , stdout , 97 ) ;
bmp_free ( bmp ) ;
}
# endif
# if (WILLUSDEBUGX & 6)
static void breakinfo_echo ( BREAKINFO * breakinfo )
{
int i ;
printf ( " @breakinfo_echo... \n " ) ;
for ( i = 0 ; i < breakinfo - > n ; i + + )
printf ( " %2d. r1=%4d, rowbase=%4d, r2=%4d, c1=%4d, c2=%4d \n " ,
i + 1 , breakinfo - > textrow [ i ] . r1 ,
breakinfo - > textrow [ i ] . rowbase ,
breakinfo - > textrow [ i ] . r2 ,
breakinfo - > textrow [ i ] . c1 ,
breakinfo - > textrow [ i ] . c2 ) ;
}
# endif
/*
* * Calculate weighted height of a rectangular region .
* * This weighted height is intended to be close to the height of
* * a capital letter , or the height of the majority of the region .
* *
*/
static int height2_calc ( int * rc , int n )
{
int i , thresh , i1 , h2 ;
int * c ;
static char * funcname = " height2_calc " ;
# if (WILLUSDEBUGX & 8)
int cmax ;
# endif
if ( n < = 0 )
return ( 1 ) ;
willus_dmem_alloc_warn ( 12 , ( void * * ) & c , sizeof ( int ) * n , funcname , 10 ) ;
memcpy ( c , rc , n * sizeof ( int ) ) ;
sorti ( c , n ) ;
# if (WILLUSDEBUGX & 8)
cmax = c [ n - 1 ] ;
# endif
for ( i = 0 ; i < n - 1 & & c [ i ] = = 0 ; i + + )
;
thresh = c [ ( i + n ) / 3 ] ;
willus_dmem_free ( 12 , ( double * * ) & c , funcname ) ;
for ( i = 0 ; i < n - 1 ; i + + )
if ( rc [ i ] > = thresh )
break ;
i1 = i ;
for ( i = n - 1 ; i > i1 ; i - - )
if ( rc [ i ] > = thresh )
break ;
# if (WILLUSDEBUGX & 8)
// printf("thresh = %g, i1=%d, i2=%d\n",(double)thresh/cmax,i1,i);
# endif
h2 = i - i1 + 1 ; /* Guaranteed to be >=1 */
return ( h2 ) ;
}
static void trim_to ( int * count , int * i1 , int i2 , double gaplen )
{
int del , dcount , igaplen , clevel , dlevel , defect_start , last_defect ;
igaplen = ( int ) ( gaplen * src_dpi / 72. ) ;
if ( igaplen < 1 )
igaplen = 1 ;
/* clevel=(int)(defect_size_pts*src_dpi/72./3.); */
clevel = 0 ;
dlevel = ( int ) ( pow ( defect_size_pts * src_dpi / 72. , 2. ) * PI / 4. + .5 ) ;
del = i2 > ( * i1 ) ? 1 : - 1 ;
defect_start = - 1 ;
last_defect = - 1 ;
dcount = 0 ;
for ( ; ( * i1 ) ! = i2 ; ( * i1 ) = ( * i1 ) + del ) {
if ( count [ ( * i1 ) ] < = clevel ) {
dcount = 0 ; /* Reset defect size */
continue ;
}
/* Mark found */
if ( dcount = = 0 ) {
if ( defect_start > = 0 )
last_defect = defect_start ;
defect_start = ( * i1 ) ;
}
dcount + = count [ ( * i1 ) ] ;
if ( dcount > = dlevel ) {
if ( last_defect > = 0 & & abs ( defect_start - last_defect ) < = igaplen )
( * i1 ) = last_defect ;
else
( * i1 ) = defect_start ;
return ;
}
}
if ( defect_start < 0 )
return ;
if ( last_defect < 0 ) {
( * i1 ) = defect_start ;
return ;
}
if ( abs ( defect_start - last_defect ) < = igaplen )
( * i1 ) = last_defect ;
else
( * i1 ) = defect_start ;
}
/*
* * A region that needs its line spacing and justification analyzed .
* *
* * The region may be wider than the max desirable region width .
* *
* * Input : breakinfo should be valid row - break information for the region .
* *
* * Calls bmpregion_one_row_wrap_and_add ( ) for each text row from the
* * breakinfo structure that is within the region .
* *
*/
static void bmpregion_analyze_justification_and_line_spacing ( BMPREGION * region ,
BREAKINFO * breakinfo , MASTERINFO * masterinfo , int * colcount ,
int * rowcount , PAGEINFO * pageinfo , int allow_text_wrapping ,
double force_scale )
{
int i , i1 , i2 , ntr , mean_row_gap , maxgap , line_spacing , nls , nch ;
BMPREGION * newregion , _newregion ;
double * id , * c1 , * c2 , * ch , * lch , * ls ;
int * just , * indented , * short_line ;
double capheight , lcheight , fontsize ;
int textheight , ragged_right , src_line_spacing ;
static char * funcname = " bmpregion_analyze_justification_and_line_spacing " ;
# if (WILLUSDEBUGX & 1)
printf ( " @bmpregion_analyze_justification_and_line_spacing " ) ;
printf ( " (%d,%d) - (%d,%d) \n " , region - > c1 , region - > r1 , region - > c2 , region - > r2 ) ;
printf ( " centering = %d \n " , breakinfo - > centered ) ;
# endif
# if (WILLUSDEBUGX & 2)
breakinfo_echo ( breakinfo ) ;
# endif
/* Locate the vertical part indices in the breakinfo structure */
newregion = & _newregion ;
breakinfo_sort_by_row_position ( breakinfo ) ;
for ( i = 0 ; i < breakinfo - > n ; i + + ) {
TEXTROW * textrow ;
textrow = & breakinfo - > textrow [ i ] ;
if ( ( textrow - > r1 + textrow - > r2 ) / 2 > = region - > r1 )
break ;
}
if ( i > = breakinfo - > n )
return ;
i1 = i ;
for ( ; i < breakinfo - > n ; i + + ) {
TEXTROW * textrow ;
textrow = & breakinfo - > textrow [ i ] ;
if ( ( textrow - > r1 + textrow - > r2 ) / 2 > region - > r2 )
break ;
}
i2 = i - 1 ;
if ( i2 < i1 )
return ;
ntr = i2 - i1 + 1 ;
# if (WILLUSDEBUGX & 1)
printf ( " i1=%d, i2=%d, ntr=%d \n " , i1 , i2 , ntr ) ;
# endif
willus_dmem_alloc_warn ( 13 , ( void * * ) & c1 , sizeof ( double ) * 6 * ntr ,
funcname , 10 ) ;
willus_dmem_alloc_warn ( 14 , ( void * * ) & just , sizeof ( int ) * 3 * ntr , funcname ,
10 ) ;
c2 = & c1 [ ntr ] ;
ch = & c2 [ ntr ] ;
lch = & ch [ ntr ] ;
ls = & lch [ ntr ] ;
id = & ls [ ntr ] ;
indented = & just [ ntr ] ;
short_line = & indented [ ntr ] ;
for ( i = 0 ; i < ntr ; i + + )
id [ i ] = i ;
/* Find baselines / font size */
capheight = lcheight = 0. ;
maxgap = - 1 ;
for ( nch = nls = 0 , i = i1 ; i < = i2 ; i + + ) {
TEXTROW * textrow ;
double ar , rh ;
int marking_flags ;
textrow = & breakinfo - > textrow [ i ] ;
c1 [ i - i1 ] = ( double ) textrow - > c1 ;
c2 [ i - i1 ] = ( double ) textrow - > c2 ;
if ( i < i2 & & maxgap < textrow - > gap ) {
maxgap = textrow - > gap ;
if ( maxgap < 2 )
maxgap = 2 ;
}
if ( textrow - > c2 < textrow - > c1 )
ar = 100. ;
else
ar = ( double ) ( textrow - > r2 - textrow - > r1 + 1 )
/ ( double ) ( textrow - > c2 - textrow - > c1 + 1 ) ;
rh = ( double ) ( textrow - > r2 - textrow - > r1 + 1 ) / src_dpi ;
if ( i < i2 & & ar < = no_wrap_ar_limit
& & rh < = no_wrap_height_limit_inches )
ls [ nls + + ] = breakinfo - > textrow [ i + 1 ] . r1 - textrow - > r1 ;
if ( ar < = no_wrap_ar_limit & & rh < = no_wrap_height_limit_inches ) {
ch [ nch ] = textrow - > capheight ;
lch [ nch ] = textrow - > lcheight ;
nch + + ;
}
/* Mark region w/gray, mark rowbase also */
marking_flags = ( i = = i1 ? 0 : 1 ) | ( i = = i2 ? 0 : 2 ) ;
if ( i < i2 | | textrow - > r2 - textrow - > rowbase > 1 )
marking_flags | = 0x10 ;
( * newregion ) = ( * region ) ;
newregion - > r1 = textrow - > r1 ;
newregion - > r2 = textrow - > r2 ;
newregion - > c1 = textrow - > c1 ;
newregion - > c2 = textrow - > c2 ;
newregion - > rowbase = textrow - > rowbase ;
mark_source_page ( newregion , 5 , marking_flags ) ;
# if (WILLUSDEBUGX & 1)
printf ( " Row %2d: (%4d,%4d) - (%4d,%4d) rowbase=%4d, lch=%d, h5050=%d, rh=%d \n " , i - i1 + 1 , textrow - > c1 , textrow - > r1 , textrow - > c2 , textrow - > r2 , textrow - > rowbase , textrow - > lcheight , textrow - > h5050 , textrow - > rowheight ) ;
# endif
}
wrapbmp_set_maxgap ( maxgap ) ;
if ( nch < 1 )
capheight = lcheight = 2 ; // Err on the side of too small
else {
capheight = median_val ( ch , nch ) ;
lcheight = median_val ( lch , nch ) ;
}
// printf("capheight = %g, lcheight = %g\n",capheight,lcheight);
bmpregion_is_centered ( region , breakinfo , i1 , i2 , & textheight ) ;
/*
* * For 12 pt font :
* * Single spacing is 14.66 pts ( Calibri ) , 13.82 pts ( Times ) , 13.81 pts ( Arial )
* * Size of cap letter is 7.7 pts ( Calibri ) , 8.1 pts ( Times ) , 8.7 pts ( Arial )
* * Size of small letter is 5.7 pts ( Calibri ) , 5.6 pts ( Times ) , 6.5 pts ( Arial )
* * Mean line spacing = 1.15 - 1.22 ( ~ 1.16 )
* * Mean cap height = 0.68
* * Mean small letter height = 0.49
*/
fontsize = ( capheight + lcheight ) / 1.17 ;
// printf("font size = %g pts.\n",(fontsize/src_dpi)*72.);
/*
* * Set line spacing for this region
*/
if ( nls > 0 )
src_line_spacing = median_val ( ls , nls ) ;
else
src_line_spacing = fontsize * 1.2 ;
if ( vertical_line_spacing < 0
& & src_line_spacing
< = fabs ( vertical_line_spacing ) * fontsize * 1.16 )
line_spacing = src_line_spacing ;
else
line_spacing = fabs ( vertical_line_spacing ) * fontsize * 1.16 ;
# if (WILLUSDEBUGX & 1)
printf ( " font size = %.2f pts = %d pixels \n " , ( fontsize / src_dpi ) * 72. , ( int ) ( fontsize + .5 ) ) ;
printf ( " src_line_spacing = %d, line_spacing = %d \n " , src_line_spacing , line_spacing ) ;
# endif
/*
if ( ntr = = 1 )
rheight = ( int ) ( ( breakinfo - > textrow [ i1 ] . r2 - breakinfo - > textrow [ i1 ] . r1 ) * 1.25 + .5 ) ;
else
rheight = ( int ) ( ( double ) ( breakinfo - > textrow [ i2 ] . rowbase - breakinfo - > textrow [ i1 ] . rowbase ) / ( ntr - 1 ) + .5 ) ;
*/
mean_row_gap = line_spacing - textheight ;
if ( mean_row_gap < = 1 )
mean_row_gap = 1 ;
/* Try to figure out if we have a ragged right edge */
if ( ntr < 3 )
ragged_right = 1 ;
else {
int flushcount ;
if ( src_left_to_right ) {
for ( flushcount = i = 0 ; i < ntr ; i + + ) {
# if (WILLUSDEBUGX & 1)
printf ( " flush_factors[%d] = %g (<.5), %g in (<.1) \n " ,
i , ( double ) ( region - > c2 - c2 [ i ] ) / textheight , ( double ) ( region - > c2 - c2 [ i ] ) / src_dpi ) ;
# endif
if ( ( double ) ( region - > c2 - c2 [ i ] ) / textheight < 0.5
& & ( double ) ( region - > c2 - c2 [ i ] ) / src_dpi < 0.1 )
flushcount + + ;
}
} else {
for ( flushcount = i = 0 ; i < ntr ; i + + ) {
# if (WILLUSDEBUGX & 1)
printf ( " flush_factors[%d] = %g (<.5), %g in (<.1) \n " ,
i , ( double ) ( c1 [ i ] - region - > c1 ) / textheight , ( double ) ( c1 [ i ] - region - > c1 ) / src_dpi ) ;
# endif
if ( ( double ) ( c1 [ i ] - region - > c1 ) / textheight < 0.5
& & ( double ) ( c1 [ i ] - region - > c1 ) / src_dpi < 0.1 )
flushcount + + ;
}
}
ragged_right = ( flushcount < = ntr / 2 ) ;
/*
if ( src_left_to_right )
{
sortxyd ( c2 , id , ntr ) ;
del = region - > c2 - c2 [ ntr - 1 - ntr / 3 ] ;
sortxyd ( id , c2 , ntr ) ;
}
else
{
sortxyd ( c1 , id , ntr ) ;
del = c1 [ ntr / 3 ] - region - > c1 ;
sortxyd ( id , c1 , ntr ) ;
}
del / = textheight ;
printf ( " del=%g \n " , del ) ;
ragged_right = ( del > 0.5 ) ;
*/
}
# if (WILLUSDEBUGX & 1)
printf ( " ragged_right=%d \n " , ragged_right ) ;
# endif
/* Store justification and other info line by line */
for ( i = i1 ; i < = i2 ; i + + ) {
double indent1 , del ;
double i1f , ilfi , i2f , ilf , ifmin , dif ;
int centered ;
TEXTROW * textrow ;
textrow = & breakinfo - > textrow [ i ] ;
i1f = ( double ) ( c1 [ i - i1 ] - region - > c1 )
/ ( region - > c2 - region - > c1 + 1 ) ;
i2f = ( double ) ( region - > c2 - c2 [ i - i1 ] )
/ ( region - > c2 - region - > c1 + 1 ) ;
ilf = src_left_to_right ? i1f : i2f ;
ilfi = ilf * ( region - > c2 - region - > c1 + 1 ) / src_dpi ; /* Indent in inches */
ifmin = i1f < i2f ? i1f : i2f ;
dif = fabs ( i1f - i2f ) ;
if ( ifmin < .01 )
ifmin = 0.01 ;
if ( src_left_to_right )
indent1 = ( double ) ( c1 [ i - i1 ] - region - > c1 ) / textheight ;
else
indent1 = ( double ) ( region - > c2 - c2 [ i - i1 ] ) / textheight ;
// printf(" row %2d: indent1=%g\n",i-i1,indent1);
if ( ! breakinfo - > centered ) {
indented [ i - i1 ] = ( indent1 > 0.5 & & ilfi < 1.2 & & ilf < .25 ) ;
centered =
( ! indented [ i - i1 ] & & indent1 > 1.0 & & dif / ifmin < 0.5 ) ;
} else {
centered = ( dif < 0.1 | | dif / ifmin < 0.5 ) ;
indented [ i - i1 ] = ( indent1 > 0.5 & & ilfi < 1.2 & & ilf < .25
& & ! centered ) ;
}
# if (WILLUSDEBUGX & 1)
printf ( " Indent %d: %d. indent1=%g, ilf=%g, centered=%d \n " , i - i1 + 1 , indented [ i - i1 ] , indent1 , ilf , centered ) ;
printf ( " indent1=%g, i1f=%g, i2f=%g \n " , indent1 , i1f , i2f ) ;
# endif
if ( centered )
just [ i - i1 ] = 4 ;
else {
/*
* * The .01 favors left justification over right justification in
* * close cases .
*/
if ( src_left_to_right )
just [ i - i1 ] = indented [ i - i1 ] | | ( i1f < i2f + .01 ) ? 0 : 8 ;
else
just [ i - i1 ] = indented [ i - i1 ] | | ( i2f < i1f + .01 ) ? 8 : 0 ;
}
if ( src_left_to_right )
del = ( double ) ( region - > c2 - textrow - > c2 ) ;
else
del = ( double ) ( textrow - > c1 - region - > c1 ) ;
/* Should we keep wrapping after this line? */
if ( ! ragged_right )
short_line [ i - i1 ] = ( del / textheight > 0.5 ) ;
else
short_line [ i - i1 ] = ( del / ( region - > c2 - region - > c1 ) > 0.25 ) ;
/* If this row is a bigger/smaller row (font) than the next row, don't wrap. */
if ( ! short_line [ i - i1 ] & & i < i2 ) {
TEXTROW * t1 ;
t1 = & breakinfo - > textrow [ i + 1 ] ;
if ( ( textrow - > h5050 > t1 - > h5050 * 1.5
| | textrow - > h5050 * 1.5 < t1 - > h5050 )
& & ( i = = 0
| | ( i > 0
& & ( textrow - > rowheight > t1 - > rowheight * 1.5
| | textrow - > rowheight * 1.5
< t1 - > rowheight ) ) ) )
short_line [ i - i1 ] = 1 ;
}
if ( ! ragged_right )
just [ i - i1 ] | = 0x40 ;
# if (WILLUSDEBUGX & 1)
printf ( " just[%d]=0x%02X, shortline[%d]=%d \n " , i - i1 , just [ i - i1 ] , i - i1 , short_line [ i - i1 ] ) ;
printf ( " textrow->c2=%d, region->c2=%d, del=%g, textheight=%d \n " , textrow - > c2 , region - > c2 , del , textheight ) ;
# endif
/* If short line, it should still be fully justified if it is wrapped. */
/*
if ( short_line [ i - i1 ] )
just [ i - i1 ] = ( just [ i - i1 ] & 0xf ) | 0x60 ;
*/
}
/*
{
double mean1 , mean2 , stdev1 , stdev2 ;
array_mean ( c1 , ntr , & mean1 , & stdev1 ) ;
array_mean ( c2 , ntr , & mean2 , & stdev2 ) ;
printf ( " Mean c1, c2 = %g, %g; stddevs = %g, %g \n " , mean1 , mean2 , stdev1 , stdev2 ) ;
printf ( " textheight = %d, line_spacing = %d \n " , textheight , line_spacing ) ;
}
*/
for ( i = i1 ; i < = i2 ; i + + ) {
TEXTROW * textrow ;
int justflags , trimflags , centered , marking_flags , gap ;
# if (WILLUSDEBUGX & 1)
aprintf ( " Row " ANSI_YELLOW " %d of %d " ANSI_NORMAL " (wrap=%d) \n " , i - i1 + 1 , i2 - i1 + 1 , allow_text_wrapping ) ;
# endif
textrow = & breakinfo - > textrow [ i ] ;
( * newregion ) = ( * region ) ;
newregion - > r1 = textrow - > r1 ;
newregion - > r2 = textrow - > r2 ;
/* The |3 tells it to use the user settings for left/right/center */
justflags = just [ i - i1 ] | 0x3 ;
centered = ( ( justflags & 0xc ) = = 4 ) ;
# if (WILLUSDEBUGX & 1)
printf ( " justflags[%d]=0x%2X, centered=%d, indented=%d \n " , i - i1 , justflags , centered , indented [ i - i1 ] ) ;
# endif
if ( allow_text_wrapping ) {
/* If this line is indented or if the justification has changed, */
/* then start a new line. */
if ( centered | | indented [ i - i1 ]
| | ( i > i1
& & ( just [ i - i1 ] & 0xc ) ! = ( just [ i - i1 - 1 ] & 0xc ) ) ) {
# ifdef WILLUSDEBUG
printf ( " wrapflush4 \n " ) ;
# endif
wrapbmp_flush ( masterinfo , 0 , pageinfo , 1 ) ;
}
# ifdef WILLUSDEBUG
printf ( " c1=%d, c2=%d \n " , newregion - > c1 , newregion - > c2 ) ;
# endif
marking_flags = 0xc | ( i = = i1 ? 0 : 1 ) | ( i = = i2 ? 0 : 2 ) ;
bmpregion_one_row_wrap_and_add ( newregion , breakinfo , i , i1 , i2 ,
masterinfo , justflags , colcount , rowcount , pageinfo ,
line_spacing , mean_row_gap , textrow - > rowbase , marking_flags ,
indented [ i - i1 ] ) ;
if ( centered | | short_line [ i - i1 ] ) {
# ifdef WILLUSDEBUG
printf ( " wrapflush5 \n " ) ;
# endif
wrapbmp_flush ( masterinfo , 0 , pageinfo , 2 ) ;
}
continue ;
}
# ifdef WILLUSDEBUG
printf ( " wrapflush5a \n " ) ;
# endif
wrapbmp_flush ( masterinfo , 0 , pageinfo , 1 ) ;
/* If default justifications, ignore all analysis and just center it. */
if ( dst_justify < 0 & & dst_fulljustify < 0 ) {
newregion - > c1 = region - > c1 ;
newregion - > c2 = region - > c2 ;
justflags = 0xad ; /* Force centered region, no justification */
trimflags = 0x80 ;
} else
trimflags = 0 ;
/* No wrapping: text wrap, trim flags, vert breaks, fscale, just */
bmpregion_add ( newregion , breakinfo , masterinfo , 0 , trimflags , 0 ,
force_scale , justflags , 5 , colcount , rowcount , pageinfo , 0 ,
textrow - > r2 - textrow - > rowbase ) ;
if ( vertical_line_spacing < 0 ) {
int gap1 ;
gap1 = line_spacing - ( textrow - > r2 - textrow - > r1 + 1 ) ;
if ( i < i2 )
gap = textrow - > gap > gap1 ? gap1 : textrow - > gap ;
else {
gap = textrow - > rowheight
- ( textrow - > rowbase + last_rowbase_internal ) ;
if ( gap < mean_row_gap / 2. )
gap = mean_row_gap ;
}
} else {
gap = line_spacing - ( textrow - > r2 - textrow - > r1 + 1 ) ;
if ( gap < mean_row_gap / 2. )
gap = mean_row_gap ;
}
if ( i < i2 )
dst_add_gap_src_pixels ( " No-wrap line " , masterinfo , gap ) ;
else {
last_h5050_internal = textrow - > h5050 ;
beginning_gap_internal = gap ;
}
}
willus_dmem_free ( 14 , ( double * * ) & just , funcname ) ;
willus_dmem_free ( 13 , ( double * * ) & c1 , funcname ) ;
# ifdef WILLUSDEBUG
printf ( " Done wrap_and_add. \n " ) ;
# endif
}
static int bmpregion_is_centered ( BMPREGION * region , BREAKINFO * breakinfo ,
int i1 , int i2 , int * th )
{
int j , i , cc , n1 , ntr ;
int textheight ;
# if (WILLUSDEBUGX & 1)
printf ( " @bmpregion_is_centered: region=(%d,%d) - (%d,%d) \n " , region - > c1 , region - > r1 , region - > c2 , region - > r2 ) ;
printf ( " nrows = %d \n " , i2 - i1 + 1 ) ;
# endif
ntr = i2 - i1 + 1 ;
for ( j = 0 ; j < 3 ; j + + ) {
for ( n1 = textheight = 0 , i = i1 ; i < = i2 ; i + + ) {
TEXTROW * textrow ;
double ar , rh ;
textrow = & breakinfo - > textrow [ i ] ;
if ( textrow - > c2 < textrow - > c1 )
ar = 100. ;
else
ar = ( double ) ( textrow - > r2 - textrow - > r1 + 1 )
/ ( double ) ( textrow - > c2 - textrow - > c1 + 1 ) ;
rh = ( double ) ( textrow - > r2 - textrow - > r1 + 1 ) / src_dpi ;
if ( j = = 2 | | ( j > = 1 & & rh < = no_wrap_height_limit_inches )
| | ( j = = 0 & & rh < = no_wrap_height_limit_inches
& & ar < = no_wrap_ar_limit ) ) {
textheight + = textrow - > rowbase - textrow - > r1 + 1 ;
n1 + + ;
}
}
if ( n1 > 0 )
break ;
}
textheight = ( int ) ( ( double ) textheight / n1 + .5 ) ;
if ( th ! = NULL ) {
( * th ) = textheight ;
# if (WILLUSDEBUGX & 1)
printf ( " textheight assigned (%d) \n " , textheight ) ;
# endif
return ( breakinfo - > centered ) ;
}
/*
* * Does region appear to be centered ?
*/
for ( cc = 0 , i = i1 ; i < = i2 ; i + + ) {
double indent1 , indent2 ;
# if (WILLUSDEBUGX & 1)
printf ( " tr[%d].c1,c2 = %d, %d \n " , i , breakinfo - > textrow [ i ] . c1 , breakinfo - > textrow [ i ] . c2 ) ;
# endif
indent1 = ( double ) ( breakinfo - > textrow [ i ] . c1 - region - > c1 ) / textheight ;
indent2 = ( double ) ( region - > c2 - breakinfo - > textrow [ i ] . c2 ) / textheight ;
# if (WILLUSDEBUGX & 1)
printf ( " tr[%d].indent1,2 = %g, %g \n " , i , indent1 , indent2 ) ;
# endif
/* If only one line and it spans the entire region, call it centered */
/* Sometimes this won't be the right thing to to. */
if ( i1 = = i2 & & indent1 < .5 & & indent2 < .5 ) {
# if (WILLUSDEBUGX & 1)
printf ( " One line default to bigger region (%s). \n " , breakinfo - > centered ? " not centered " : " centered " ) ;
# endif
return ( 1 ) ;
}
if ( fabs ( indent1 - indent2 ) > 1.5 ) {
# if (WILLUSDEBUGX & 1)
printf ( " Region not centered. \n " ) ;
# endif
return ( 0 ) ;
}
if ( indent1 > 1.0 )
cc + + ;
}
# if (WILLUSDEBUGX & 1)
printf ( " Region centering: i=%d, i2=%d, cc=%d, ntr=%d \n " , i , i2 , cc , ntr ) ;
# endif
if ( cc > ntr / 2 ) {
# if (WILLUSDEBUGX & 1)
printf ( " Region is centered (enough obviously centered lines). \n " ) ;
# endif
return ( 1 ) ;
}
# if (WILLUSDEBUGX & 1)
printf ( " Not centered (not enough obviously centered lines). \n " ) ;
# endif
return ( 0 ) ;
}
/* array.c */
/*
* *
* * Compute mean and standard deviation
* *
*/
double array_mean ( double * a , int n , double * mean , double * stddev )
{
int i ;
double sum , avg , sum_sq ;
if ( n < 1 )
return ( 0. ) ;
for ( sum = sum_sq = i = 0 ; i < n ; i + + )
sum + = a [ i ] ;
avg = sum / n ;
if ( mean ! = NULL )
( * mean ) = avg ;
if ( stddev ! = NULL ) {
double sum_sq ;
for ( sum_sq = i = 0 ; i < n ; i + + )
sum_sq + = ( a [ i ] - avg ) * ( a [ i ] - avg ) ;
( * stddev ) = sqrt ( sum_sq / n ) ;
}
return ( avg ) ;
}
/*
* * CAUTION : This function re - orders the x [ ] array !
*/
static double median_val ( double * x , int n )
{
int i1 , n1 ;
if ( n < 4 )
return ( array_mean ( x , n , NULL , NULL ) ) ;
sortd ( x , n ) ;
if ( n = = 4 ) {
n1 = 2 ;
i1 = 1 ;
} else if ( n = = 5 ) {
n1 = 3 ;
i1 = 1 ;
} else {
n1 = n / 3 ;
i1 = ( n - n1 ) / 2 ;
}
return ( array_mean ( & x [ i1 ] , n1 , NULL , NULL ) ) ;
}
/*
* *
* * Searches the region for vertical break points and stores them into
* * the BREAKINFO structure .
* *
* * apsize_in = averaging aperture size in inches . Use - 1 for dynamic aperture .
* *
*/
static void bmpregion_find_vertical_breaks ( BMPREGION * region ,
BREAKINFO * breakinfo , int * colcount , int * rowcount , double apsize_in )
{
static char * funcname = " bmpregion_find_vertical_breaks " ;
int nr , i , brc , brcmin , dtrc , trc , aperture , aperturemax , figrow , labelrow ;
int ntr , rhmin_pix ;
BMPREGION * newregion , _newregion ;
int * rowthresh ;
double min_fig_height , max_fig_gap , max_label_height ;
min_fig_height = dst_min_figure_height_in ;
max_fig_gap = 0.16 ;
max_label_height = 0.5 ;
/* Trim region and populate colcount/rowcount arrays */
bmpregion_trim_margins ( region , colcount , rowcount , 0xf ) ;
newregion = & _newregion ;
( * newregion ) = ( * region ) ;
if ( debug )
printf ( " @bmpregion_find_vertical_breaks: (%d,%d) - (%d,%d) \n " ,
region - > c1 , region - > r1 , region - > c2 , region - > r2 ) ;
/*
* * brc = consecutive blank pixel rows
* * trc = consecutive non - blank pixel rows
* * dtrc = number of non blank pixel rows since last dump
*/
nr = region - > r2 - region - > r1 + 1 ;
willus_dmem_alloc_warn ( 15 , ( void * * ) & rowthresh , sizeof ( int ) * nr , funcname ,
10 ) ;
brcmin = max_vertical_gap_inches * src_dpi ;
aperturemax = ( int ) ( src_dpi / 72. + .5 ) ;
if ( aperturemax < 2 )
aperturemax = 2 ;
aperture = ( int ) ( src_dpi * apsize_in + .5 ) ;
/*
for ( i = region - > r1 ; i < = region - > r2 ; i + + )
printf ( " rowcount[%d]=%d \n " , i , rowcount [ i ] ) ;
*/
breakinfo - > rhmean_pixels = 0 ; // Mean text row height
ntr = 0 ; // Number of text rows
/* Fill rowthresh[] array */
for ( dtrc = 0 , i = region - > r1 ; i < = region - > r2 ; i + + ) {
int ii , i1 , i2 , sum , pt ;
if ( apsize_in < 0. ) {
aperture = ( int ) ( dtrc / 13.7 + .5 ) ;
if ( aperture > aperturemax )
aperture = aperturemax ;
if ( aperture < 2 )
aperture = 2 ;
}
i1 = i - aperture / 2 ;
i2 = i1 + aperture - 1 ;
if ( i1 < region - > r1 )
i1 = region - > r1 ;
if ( i2 > region - > r2 )
i2 = region - > r2 ;
pt = ( int ) ( ( i2 - i1 + 1 ) * gtr_in * src_dpi + .5 ) ; /* pixel count threshold */
if ( pt < 1 )
pt = 1 ;
/* Sum over row aperture */
for ( sum = 0 , ii = i1 ; ii < = i2 ; sum + = rowcount [ ii ] , ii + + )
;
/* Does row have few enough black pixels to be considered blank? */
if ( ( rowthresh [ i - region - > r1 ] = 10 * sum / pt ) < = 40 ) {
if ( dtrc > 0 ) {
breakinfo - > rhmean_pixels + = dtrc ;
ntr + + ;
}
dtrc = 0 ;
} else
dtrc + + ;
}
if ( dtrc > 0 ) {
breakinfo - > rhmean_pixels + = dtrc ;
ntr + + ;
}
if ( ntr > 0 )
breakinfo - > rhmean_pixels / = ntr ;
/*
printf ( " rhmean=%d (ntr=%d) \n " , breakinfo - > rhmean_pixels , ntr ) ;
{
FILE * f ;
static int count = 0 ;
f = fopen ( " rthresh.ep " , count = = 0 ? " w " : " a " ) ;
count + + ;
for ( i = region - > r1 ; i < = region - > r2 ; i + + )
nprintf ( f , " %d \n " , rowthresh [ i - region - > r1 ] ) ;
nprintf ( f , " //nc \n " ) ;
fclose ( f ) ;
}
*/
/* Minimum text row height required (pixels) */
rhmin_pix = breakinfo - > rhmean_pixels / 3 ;
if ( rhmin_pix < .04 * src_dpi )
rhmin_pix = .04 * src_dpi ;
if ( rhmin_pix > .13 * src_dpi )
rhmin_pix = .13 * src_dpi ;
if ( rhmin_pix < 1 )
rhmin_pix = 1 ;
/*
for ( rmax = region - > r2 ; rmax > region - > r1 ; rmax - - )
if ( rowthresh [ rmax - region - > r1 ] > 10 )
break ;
*/
/* Look for "row" gaps in the region so that it can be broken into */
/* multiple "rows". */
breakinfo - > n = 0 ;
for ( labelrow = figrow = - 1 , dtrc = trc = brc = 0 , i = region - > r1 ;
i < = region - > r2 ; i + + ) {
/* Does row have few enough black pixels to be considered blank? */
if ( rowthresh [ i - region - > r1 ] < = 10 ) {
trc = 0 ;
brc + + ;
/*
* * Max allowed white space between rows = max_vertical_gap_inches
*/
if ( dtrc = = 0 ) {
if ( brc > brcmin )
newregion - > r1 + + ;
continue ;
}
/*
* * Big enough blank gap , so add one row / line
*/
if ( dtrc + brc > = rhmin_pix ) {
int i0 , iopt ;
double region_height_inches ;
double gap_inches ;
if ( dtrc < src_dpi * 0.02 )
dtrc = src_dpi * 0.02 ;
if ( dtrc < 2 )
dtrc = 2 ;
/* Look for more optimum point */
for ( i0 = iopt = i ; i < = region - > r2 & & i - i0 < dtrc ; i + + ) {
if ( rowthresh [ i - region - > r1 ]
< rowthresh [ iopt - region - > r1 ] ) {
iopt = i ;
if ( rowthresh [ i - region - > r1 ] = = 0 )
break ;
}
if ( rowthresh [ i - region - > r1 ] > 100 )
break ;
}
/* If at end of region and haven't found perfect break, stay at end */
if ( i > region - > r2 & & rowthresh [ iopt - region - > r1 ] > 0 )
i = region - > r2 ;
else
i = iopt ;
newregion - > r2 = i - 1 ;
region_height_inches = ( double ) ( newregion - > r2 - newregion - > r1
+ 1 ) / src_dpi ;
/* Could this region be a figure? */
if ( figrow < 0 & & region_height_inches > = min_fig_height ) {
/* If so, set figrow and don't process it yet. */
figrow = newregion - > r1 ;
labelrow = - 1 ;
newregion - > r1 = i ;
dtrc = trc = 0 ;
brc = 1 ;
continue ;
}
/* Are we processing a figure? */
if ( figrow > = 0 ) {
/* Compute most recent gap */
if ( labelrow > = 0 )
gap_inches = ( double ) ( labelrow - newregion - > r1 )
/ src_dpi ;
else
gap_inches = - 1. ;
/* If gap and region height are small enough, tack them on to the figure. */
if ( region_height_inches < max_label_height
& & gap_inches > 0. & & gap_inches < max_fig_gap )
newregion - > r1 = figrow ;
else {
/* Not small enough--dump the previous figure. */
newregion - > r2 = newregion - > r1 - 1 ;
newregion - > r1 = figrow ;
newregion - > c1 = region - > c1 ;
newregion - > c2 = region - > c2 ;
bmpregion_trim_margins ( newregion , colcount , rowcount ,
0x1f ) ;
if ( newregion - > r2 > newregion - > r1 )
textrow_assign_bmpregion (
& breakinfo - > textrow [ breakinfo - > n + + ] ,
newregion ) ;
if ( gap_inches > 0. & & gap_inches < max_fig_gap ) {
/* This new region might be a figure--set it as the new figure */
/* and don't dump it yet. */
figrow = newregion - > r2 + 1 ;
labelrow = - 1 ;
newregion - > r1 = i ;
dtrc = trc = 0 ;
brc = 1 ;
continue ;
} else {
newregion - > r1 = newregion - > r2 + 1 ;
newregion - > r2 = i - 1 ;
}
}
/* Cancel figure processing */
figrow = - 1 ;
labelrow = - 1 ;
}
/*
if ( newregion - > r2 > = rmax )
i = newregion - > r2 = region - > r2 ;
*/
newregion - > c1 = region - > c1 ;
newregion - > c2 = region - > c2 ;
bmpregion_trim_margins ( newregion , colcount , rowcount , 0x1f ) ;
if ( newregion - > r2 > newregion - > r1 )
textrow_assign_bmpregion (
& breakinfo - > textrow [ breakinfo - > n + + ] , newregion ) ;
newregion - > r1 = i ;
dtrc = trc = 0 ;
brc = 1 ;
}
} else {
if ( figrow > = 0 & & labelrow < 0 )
labelrow = i ;
dtrc + + ;
trc + + ;
brc = 0 ;
}
}
newregion - > r2 = region - > r2 ;
if ( dtrc > 0 & & newregion - > r2 - newregion - > r1 + 1 > 0 ) {
/* If we were processing a figure, include it. */
if ( figrow > = 0 )
newregion - > r1 = figrow ;
newregion - > c1 = region - > c1 ;
newregion - > c2 = region - > c2 ;
bmpregion_trim_margins ( newregion , colcount , rowcount , 0x1f ) ;
if ( newregion - > r2 > newregion - > r1 )
textrow_assign_bmpregion ( & breakinfo - > textrow [ breakinfo - > n + + ] ,
newregion ) ;
}
/* Compute gaps between rows and row heights */
breakinfo_compute_row_gaps ( breakinfo , region - > r2 ) ;
willus_dmem_free ( 15 , ( double * * ) & rowthresh , funcname ) ;
}
static void textrow_assign_bmpregion ( TEXTROW * textrow , BMPREGION * region )
{
textrow - > r1 = region - > r1 ;
textrow - > r2 = region - > r2 ;
textrow - > c1 = region - > c1 ;
textrow - > c2 = region - > c2 ;
textrow - > rowbase = region - > rowbase ;
textrow - > lcheight = region - > lcheight ;
textrow - > capheight = region - > capheight ;
textrow - > h5050 = region - > h5050 ;
}
static void breakinfo_compute_row_gaps ( BREAKINFO * breakinfo , int r2 )
{
int i , n ;
n = breakinfo - > n ;
if ( n < = 0 )
return ;
breakinfo - > textrow [ 0 ] . rowheight = breakinfo - > textrow [ 0 ] . r2
- breakinfo - > textrow [ 0 ] . r1 ;
for ( i = 0 ; i < n - 1 ; i + + )
breakinfo - > textrow [ i ] . gap = breakinfo - > textrow [ i + 1 ] . r1
- breakinfo - > textrow [ i ] . rowbase - 1 ;
/*
breakinfo - > textrow [ i ] . rowheight = breakinfo - > textrow [ i + 1 ] . r1 - breakinfo - > textrow [ i ] . r1 ;
*/
for ( i = 1 ; i < n ; i + + )
breakinfo - > textrow [ i ] . rowheight = breakinfo - > textrow [ i ] . rowbase
- breakinfo - > textrow [ i - 1 ] . rowbase ;
breakinfo - > textrow [ n - 1 ] . gap = r2 - breakinfo - > textrow [ n - 1 ] . rowbase ;
}
static void breakinfo_compute_col_gaps ( BREAKINFO * breakinfo , int c2 )
{
int i , n ;
n = breakinfo - > n ;
if ( n < = 0 )
return ;
for ( i = 0 ; i < n - 1 ; i + + ) {
breakinfo - > textrow [ i ] . gap = breakinfo - > textrow [ i + 1 ] . c1
- breakinfo - > textrow [ i ] . c2 - 1 ;
breakinfo - > textrow [ i ] . rowheight = breakinfo - > textrow [ i + 1 ] . c1
- breakinfo - > textrow [ i ] . c1 ;
}
breakinfo - > textrow [ n - 1 ] . gap = c2 - breakinfo - > textrow [ n - 1 ] . c2 ;
breakinfo - > textrow [ n - 1 ] . rowheight = breakinfo - > textrow [ n - 1 ] . c2
- breakinfo - > textrow [ n - 1 ] . c1 ;
}
static void breakinfo_remove_small_col_gaps ( BREAKINFO * breakinfo , int lcheight ,
double mingap )
{
int i , j ;
if ( mingap < word_spacing )
mingap = word_spacing ;
for ( i = 0 ; i < breakinfo - > n - 1 ; i + + ) {
double gap ;
gap = ( double ) breakinfo - > textrow [ i ] . gap / lcheight ;
if ( gap > = mingap )
continue ;
breakinfo - > textrow [ i ] . c2 = breakinfo - > textrow [ i + 1 ] . c2 ;
breakinfo - > textrow [ i ] . gap = breakinfo - > textrow [ i + 1 ] . gap ;
if ( breakinfo - > textrow [ i + 1 ] . r1 < breakinfo - > textrow [ i ] . r1 )
breakinfo - > textrow [ i ] . r1 = breakinfo - > textrow [ i + 1 ] . r1 ;
if ( breakinfo - > textrow [ i + 1 ] . r2 > breakinfo - > textrow [ i ] . r2 )
breakinfo - > textrow [ i ] . r2 = breakinfo - > textrow [ i + 1 ] . r2 ;
for ( j = i + 1 ; j < breakinfo - > n - 1 ; j + + )
breakinfo - > textrow [ j ] = breakinfo - > textrow [ j + 1 ] ;
breakinfo - > n - - ;
i - - ;
}
}
static void breakinfo_remove_small_rows ( BREAKINFO * breakinfo , double fracrh ,
double fracgap , BMPREGION * region , int * colcount , int * rowcount )
{
int i , j , mg , mh , mg0 , mg1 ;
int c1 , c2 , nc ;
int * rh , * gap ;
static char * funcname = " breakinfo_remove_small_rows " ;
# if (WILLUSDEBUGX & 2)
printf ( " @breakinfo_remove_small_rows(fracrh=%g,fracgap=%g) \n " , fracrh , fracgap ) ;
# endif
if ( breakinfo - > n < 2 )
return ;
c1 = region - > c1 ;
c2 = region - > c2 ;
nc = c2 - c1 + 1 ;
willus_dmem_alloc_warn ( 16 , ( void * * ) & rh , 2 * sizeof ( int ) * breakinfo - > n ,
funcname , 10 ) ;
gap = & rh [ breakinfo - > n ] ;
for ( i = 0 ; i < breakinfo - > n ; i + + ) {
rh [ i ] = breakinfo - > textrow [ i ] . r2 - breakinfo - > textrow [ i ] . r1 + 1 ;
if ( i < breakinfo - > n - 1 )
gap [ i ] = breakinfo - > textrow [ i ] . gap ;
}
sorti ( rh , breakinfo - > n ) ;
sorti ( gap , breakinfo - > n - 1 ) ;
mh = rh [ breakinfo - > n / 2 ] ;
mh * = fracrh ;
if ( mh < 1 )
mh = 1 ;
mg0 = gap [ ( breakinfo - > n - 1 ) / 2 ] ;
mg = mg0 * fracgap ;
mg1 = mg0 * 0.7 ;
if ( mg < 1 )
mg = 1 ;
# if (WILLUSDEBUGX & 2)
printf ( " mh = %d x %g = %d \n " , rh [ breakinfo - > n / 2 ] , fracrh , mh ) ;
printf ( " mg = %d x %g = %d \n " , gap [ breakinfo - > n / 2 ] , fracgap , mg ) ;
# endif
for ( i = 0 ; i < breakinfo - > n ; i + + ) {
TEXTROW * textrow ;
int trh , gs1 , gs2 , g1 , g2 , gap_is_big , row_too_small ;
double m1 , m2 , row_width_inches ;
textrow = & breakinfo - > textrow [ i ] ;
trh = textrow - > r2 - textrow - > r1 + 1 ;
if ( i = = 0 ) {
g1 = mg0 + 1 ;
gs1 = mg + 1 ;
} else {
g1 = textrow - > r1 - breakinfo - > textrow [ i - 1 ] . r2 - 1 ;
gs1 = breakinfo - > textrow [ i - 1 ] . gap ;
}
if ( i = = breakinfo - > n - 1 ) {
g2 = mg0 + 1 ;
gs2 = mg + 1 ;
} else {
g2 = breakinfo - > textrow [ i + 1 ] . r1 - textrow - > r2 - 1 ;
gs2 = breakinfo - > textrow [ i ] . gap ;
}
# if (WILLUSDEBUGX & 2)
printf ( " rowheight[%d] = %d, mh=%d, gs1=%d, gs2=%d \n " , i , trh , gs1 , gs2 ) ;
# endif
gap_is_big = ( trh > = mh | | ( gs1 > = mg & & gs2 > = mg ) ) ;
/*
* * Is the row width small and centered ? If so , it should probably
* * be attached to its nearest neighbor - - it ' s usually a fragment of
* * an equation or a table / figure .
*/
row_width_inches = ( double ) ( textrow - > c2 - textrow - > c1 + 1 ) / src_dpi ;
m1 = fabs ( textrow - > c1 - c1 ) / nc ;
m2 = fabs ( textrow - > c2 - c2 ) / nc ;
row_too_small = m1 > 0.1 & & m2 > 0.1
& & row_width_inches < little_piece_threshold_inches
& & ( g1 < = mg1 | | g2 < = mg1 ) ;
# if (WILLUSDEBUGX & 2)
printf ( " m1=%g, m2=%g, rwi=%g, g1=%d, g2=%d, mg0=%d \n " , m1 , m2 , row_width_inches , g1 , g2 , mg0 ) ;
# endif
if ( gap_is_big & & ! row_too_small )
continue ;
# if (WILLUSDEBUGX & 2)
printf ( " row[%d] to be combined w/next row. \n " , i ) ;
# endif
if ( row_too_small ) {
if ( g1 < g2 )
i - - ;
} else {
if ( gs1 < gs2 )
i - - ;
}
/*
printf ( " Removing row. nrows=%d, rh=%d, gs1=%d, gs2=%d \n " , breakinfo - > n , trh , gs1 , gs2 ) ;
printf ( " mh = %d, mg = %d \n " , rh [ breakinfo - > n / 2 ] , gap [ ( breakinfo - > n - 1 ) / 2 ] ) ;
*/
breakinfo - > textrow [ i ] . r2 = breakinfo - > textrow [ i + 1 ] . r2 ;
if ( breakinfo - > textrow [ i + 1 ] . c2 > breakinfo - > textrow [ i ] . c2 )
breakinfo - > textrow [ i ] . c2 = breakinfo - > textrow [ i + 1 ] . c2 ;
if ( breakinfo - > textrow [ i + 1 ] . c1 < breakinfo - > textrow [ i ] . c1 )
breakinfo - > textrow [ i ] . c1 = breakinfo - > textrow [ i + 1 ] . c1 ;
/* Re-compute rowbase, capheight, lcheight */
{
BMPREGION newregion ;
newregion = ( * region ) ;
newregion . c1 = breakinfo - > textrow [ i ] . c1 ;
newregion . c2 = breakinfo - > textrow [ i ] . c2 ;
newregion . r1 = breakinfo - > textrow [ i ] . r1 ;
newregion . r2 = breakinfo - > textrow [ i ] . r2 ;
bmpregion_trim_margins ( & newregion , colcount , rowcount , 0x1f ) ;
newregion . c1 = breakinfo - > textrow [ i ] . c1 ;
newregion . c2 = breakinfo - > textrow [ i ] . c2 ;
newregion . r1 = breakinfo - > textrow [ i ] . r1 ;
newregion . r2 = breakinfo - > textrow [ i ] . r2 ;
textrow_assign_bmpregion ( & breakinfo - > textrow [ i ] , & newregion ) ;
}
for ( j = i + 1 ; j < breakinfo - > n - 1 ; j + + )
breakinfo - > textrow [ j ] = breakinfo - > textrow [ j + 1 ] ;
breakinfo - > n - - ;
i - - ;
}
willus_dmem_free ( 16 , ( double * * ) & rh , funcname ) ;
}
static void breakinfo_alloc ( int index , BREAKINFO * breakinfo , int nrows )
{
static char * funcname = " breakinfo_alloc " ;
willus_dmem_alloc_warn ( index , ( void * * ) & breakinfo - > textrow ,
sizeof ( TEXTROW ) * ( nrows / 2 + 2 ) , funcname , 10 ) ;
}
static void breakinfo_free ( int index , BREAKINFO * breakinfo )
{
static char * funcname = " breakinfo_free " ;
willus_dmem_free ( index , ( double * * ) & breakinfo - > textrow , funcname ) ;
}
static void breakinfo_sort_by_gap ( BREAKINFO * breakinfo )
{
int n , top , n1 ;
TEXTROW * x , x0 ;
x = breakinfo - > textrow ;
n = breakinfo - > n ;
if ( n < 2 )
return ;
top = n / 2 ;
n1 = n - 1 ;
while ( 1 ) {
if ( top > 0 ) {
top - - ;
x0 = x [ top ] ;
} else {
x0 = x [ n1 ] ;
x [ n1 ] = x [ 0 ] ;
n1 - - ;
if ( ! n1 ) {
x [ 0 ] = x0 ;
return ;
}
}
{
int parent , child ;
parent = top ;
child = top * 2 + 1 ;
while ( child < = n1 ) {
if ( child < n1 & & x [ child ] . gap < x [ child + 1 ] . gap )
child + + ;
if ( x0 . gap < x [ child ] . gap ) {
x [ parent ] = x [ child ] ;
parent = child ;
child + = ( parent + 1 ) ;
} else
break ;
}
x [ parent ] = x0 ;
}
}
}
static void breakinfo_sort_by_row_position ( BREAKINFO * breakinfo )
{
int n , top , n1 ;
TEXTROW * x , x0 ;
x = breakinfo - > textrow ;
n = breakinfo - > n ;
if ( n < 2 )
return ;
top = n / 2 ;
n1 = n - 1 ;
while ( 1 ) {
if ( top > 0 ) {
top - - ;
x0 = x [ top ] ;
} else {
x0 = x [ n1 ] ;
x [ n1 ] = x [ 0 ] ;
n1 - - ;
if ( ! n1 ) {
x [ 0 ] = x0 ;
return ;
}
}
{
int parent , child ;
parent = top ;
child = top * 2 + 1 ;
while ( child < = n1 ) {
if ( child < n1 & & x [ child ] . r1 < x [ child + 1 ] . r1 )
child + + ;
if ( x0 . r1 < x [ child ] . r1 ) {
x [ parent ] = x [ child ] ;
parent = child ;
child + = ( parent + 1 ) ;
} else
break ;
}
x [ parent ] = x0 ;
}
}
}
/*
* * Add a vertically - contiguous rectangular region to the destination bitmap .
* * The rectangular region may be broken up horizontally ( wrapped ) .
*/
static void bmpregion_one_row_find_breaks ( BMPREGION * region ,
BREAKINFO * breakinfo , int * colcount , int * rowcount , int add_to_dbase )
{
int nc , i , mingap , col0 , dr , thlow , thhigh ;
int * bp ;
BMPREGION * newregion , _newregion ;
static char * funcname = " bmpregion_one_row_find_breaks " ;
if ( debug )
printf ( " @bmpregion_one_row_find_breaks(%d,%d)-(%d,%d) \n " , region - > c1 ,
region - > r1 , region - > c2 , region - > r2 ) ;
newregion = & _newregion ;
( * newregion ) = ( * region ) ;
bmpregion_trim_margins ( newregion , colcount , rowcount , 0x1f ) ;
region - > lcheight = newregion - > lcheight ;
region - > capheight = newregion - > capheight ;
region - > rowbase = newregion - > rowbase ;
region - > h5050 = newregion - > h5050 ;
nc = newregion - > c2 - newregion - > c1 + 1 ;
breakinfo - > n = 0 ;
if ( nc < 6 )
return ;
/*
* * Look for " space-sized " gaps , i . e . gaps that would occur between words .
* * Use this as pixel counting aperture .
*/
dr = newregion - > lcheight ;
mingap = dr * word_spacing * 0.8 ;
if ( mingap < 2 )
mingap = 2 ;
/*
* * Find places where there are gaps ( store in bp array )
* * Could do this more intelligently - - maybe calculate a histogram ?
*/
willus_dmem_alloc_warn ( 18 , ( void * * ) & bp , sizeof ( int ) * nc , funcname , 10 ) ;
for ( i = 0 ; i < nc ; i + + )
bp [ i ] = 0 ;
if ( src_left_to_right ) {
for ( i = newregion - > c1 ; i < = newregion - > c2 ; i + + ) {
int i1 , i2 , pt , sum , ii ;
i1 = i - mingap / 2 ;
i2 = i1 + mingap - 1 ;
if ( i1 < newregion - > c1 )
i1 = newregion - > c1 ;
if ( i2 > newregion - > c2 )
i2 = newregion - > c2 ;
pt = ( int ) ( ( i2 - i1 + 1 ) * gtw_in * src_dpi + .5 ) ;
if ( pt < 1 )
pt = 1 ;
for ( sum = 0 , ii = i1 ; ii < = i2 ; ii + + , sum + = colcount [ ii ] )
;
bp [ i - newregion - > c1 ] = 10 * sum / pt ;
}
} else {
for ( i = newregion - > c2 ; i > = newregion - > c1 ; i - - ) {
int i1 , i2 , pt , sum , ii ;
i1 = i - mingap / 2 ;
i2 = i1 + mingap - 1 ;
if ( i1 < newregion - > c1 )
i1 = newregion - > c1 ;
if ( i2 > newregion - > c2 )
i2 = newregion - > c2 ;
pt = ( int ) ( ( i2 - i1 + 1 ) * gtw_in * src_dpi + .5 ) ;
if ( pt < 1 )
pt = 1 ;
for ( sum = 0 , ii = i1 ; ii < = i2 ; ii + + , sum + = colcount [ ii ] )
;
bp [ i - newregion - > c1 ] = 10 * sum / pt ;
}
}
# if (WILLUSDEBUGX & 4)
if ( region - > r1 > 3699 & & region - > r1 < 3750 )
{
static int a = 0 ;
FILE * f ;
f = fopen ( " outbp.ep " , a = = 0 ? " w " : " a " ) ;
a + + ;
fprintf ( f , " /sa l \" (%d,%d)-(%d,%d) lch=%d \" 2 \n " , region - > c1 , region - > r1 , region - > c2 , region - > r2 , region - > lcheight ) ;
for ( i = 0 ; i < nc ; i + + )
fprintf ( f , " %d \n " , bp [ i ] ) ;
fprintf ( f , " //nc \n " ) ;
fclose ( f ) ;
}
# endif
thlow = 10 ;
thhigh = 50 ;
/*
* * Break into pieces
*/
for ( col0 = newregion - > c1 ; col0 < = newregion - > c2 ; col0 + + ) {
int copt , c0 ;
BMPREGION xregion ;
xregion = ( * newregion ) ;
xregion . c1 = col0 ;
for ( ; col0 < = newregion - > c2 ; col0 + + )
if ( bp [ col0 - newregion - > c1 ] > = thhigh )
break ;
if ( col0 > newregion - > c2 )
break ;
for ( col0 + + ; col0 < = newregion - > c2 ; col0 + + )
if ( bp [ col0 - newregion - > c1 ] < thlow )
break ;
for ( copt = c0 = col0 ; col0 < = newregion - > c2 & & col0 - c0 < = dr ;
col0 + + ) {
if ( bp [ col0 - newregion - > c1 ] < bp [ copt - newregion - > c1 ] )
copt = col0 ;
if ( bp [ col0 - newregion - > c1 ] > thhigh )
break ;
}
if ( copt > newregion - > c2 )
copt = newregion - > c2 ;
xregion . c2 = copt ;
if ( xregion . c2 - xregion . c1 < 2 )
continue ;
bmpregion_trim_margins ( & xregion , colcount , rowcount , 0x1f ) ;
textrow_assign_bmpregion ( & breakinfo - > textrow [ breakinfo - > n + + ] , & xregion ) ;
col0 = copt ;
if ( copt = = newregion - > c2 )
break ;
}
breakinfo_compute_col_gaps ( breakinfo , newregion - > c2 ) ;
willus_dmem_free ( 18 , ( double * * ) & bp , funcname ) ;
/* Remove small gaps */
{
double median_gap ;
word_gaps_add ( add_to_dbase ? breakinfo : NULL , region - > lcheight ,
& median_gap ) ;
breakinfo_remove_small_col_gaps ( breakinfo , region - > lcheight ,
median_gap / 1.9 ) ;
}
}
/*
* * pi = preserve indentation
*/
static void bmpregion_one_row_wrap_and_add ( BMPREGION * region ,
BREAKINFO * rowbreakinfo , int index , int i1 , int i2 ,
MASTERINFO * masterinfo , int justflags , int * colcount , int * rowcount ,
PAGEINFO * pageinfo , int line_spacing , int mean_row_gap , int rowbase ,
int marking_flags , int pi )
{
int nc , nr , i , i0 , gappix ;
double aspect_ratio , region_height ;
BREAKINFO * colbreaks , _colbreaks ;
BMPREGION * newregion , _newregion ;
# if (WILLUSDEBUGX & 4)
printf ( " @bmpregion_one_row_wrap_and_add, index=%d, i1=%d, i2=%d \n " , index , i1 , i2 ) ;
# endif
newregion = & _newregion ;
( * newregion ) = ( * region ) ;
bmpregion_trim_margins ( newregion , colcount , rowcount , 0xf ) ;
nc = newregion - > c2 - newregion - > c1 + 1 ;
nr = newregion - > r2 - newregion - > r1 + 1 ;
if ( nc < 6 )
return ;
aspect_ratio = ( double ) nr / nc ;
region_height = ( double ) nr / src_dpi ;
if ( aspect_ratio > no_wrap_ar_limit
& & region_height > no_wrap_height_limit_inches ) {
newregion - > r1 = region - > r1 ;
newregion - > r2 = region - > r2 ;
# ifdef WILLUSDEBUG
printf ( " wrapflush6 \n " ) ;
# endif
wrapbmp_flush ( masterinfo , 0 , pageinfo , 1 ) ;
if ( index > i1 )
dst_add_gap_src_pixels ( " Tall region " , masterinfo ,
rowbreakinfo - > textrow [ index - 1 ] . gap ) ;
bmpregion_add ( newregion , rowbreakinfo , masterinfo , 0 , 0xf , 0 , - 1.0 , 0 ,
2 , colcount , rowcount , pageinfo , 0xf ,
rowbreakinfo - > textrow [ index ] . r2
- rowbreakinfo - > textrow [ index ] . rowbase ) ;
if ( index < i2 )
gap_override_internal = rowbreakinfo - > textrow [ index ] . gap ;
return ;
}
colbreaks = & _colbreaks ;
colbreaks - > textrow = NULL ;
breakinfo_alloc ( 106 , colbreaks , newregion - > c2 - newregion - > c1 + 1 ) ;
bmpregion_one_row_find_breaks ( newregion , colbreaks , colcount , rowcount , 1 ) ;
if ( pi & & colbreaks - > n > 0 ) {
if ( src_left_to_right )
colbreaks - > textrow [ 0 ] . c1 = region - > c1 ;
else
colbreaks - > textrow [ colbreaks - > n - 1 ] . c2 = region - > c2 ;
}
/*
hs = 0. ;
for ( i = 0 ; i < colbreaks - > n ; i + + )
hs + = ( colbreaks - > textrow [ i ] . r2 - colbreaks - > textrow [ i ] . r1 ) ;
hs / = colbreaks - > n ;
*/
/*
* * Find appropriate letter height to use for word spacing
*/
{
double median_gap ;
word_gaps_add ( NULL , newregion - > lcheight , & median_gap ) ;
gappix = ( int ) ( median_gap * newregion - > lcheight + .5 ) ;
}
# if (WILLUSDEBUGX & 4)
printf ( " Before small gap removal, column breaks: \n " ) ;
breakinfo_echo ( colbreaks ) ;
# endif
# if (WILLUSDEBUGX & 4)
printf ( " After small gap removal, column breaks: \n " ) ;
breakinfo_echo ( colbreaks ) ;
# endif
if ( show_marked_source )
for ( i = 0 ; i < colbreaks - > n ; i + + ) {
BMPREGION xregion ;
xregion = ( * newregion ) ;
xregion . c1 = colbreaks - > textrow [ i ] . c1 ;
xregion . c2 = colbreaks - > textrow [ i ] . c2 ;
mark_source_page ( & xregion , 2 , marking_flags ) ;
}
# if (WILLUSDEBUGX & 4)
for ( i = 0 ; i < colbreaks - > n ; i + + )
printf ( " colbreak[%d] = %d - %d \n " , i , colbreaks - > textrow [ i ] . c1 , colbreaks - > textrow [ i ] . c2 ) ;
# endif
/* Maybe skip gaps < 0.5*median_gap or collect gap/rowheight ratios and skip small gaps */
/* (Could be thrown off by full-justified articles where some lines have big gaps.) */
/* Need do call a separate function that removes these gaps. */
for ( i0 = 0 ; i0 < colbreaks - > n ; ) {
int i1 , i2 , toolong , rw , remaining_width_pixels ;
BMPREGION reg ;
toolong = 0 ; /* Avoid compiler warning */
for ( i = i0 ; i < colbreaks - > n ; i + + ) {
int wordgap ;
wordgap = wrapbmp_ends_in_hyphen ( ) ? 0 : gappix ;
i1 = src_left_to_right ? i0 : colbreaks - > n - 1 - i ;
i2 = src_left_to_right ? i : colbreaks - > n - 1 - i0 ;
rw = ( colbreaks - > textrow [ i2 ] . c2 - colbreaks - > textrow [ i1 ] . c1 + 1 ) ;
remaining_width_pixels = wrapbmp_remaining ( ) ;
toolong = ( rw + wordgap > remaining_width_pixels ) ;
# if (WILLUSDEBUGX & 4)
printf ( " i1=%d, i2=%d, rw=%d, rw+gap=%d, remainder=%d, toolong=%d \n " , i1 , i2 , rw , rw + wordgap , remaining_width_pixels , toolong ) ;
# endif
/*
* * If we ' re too long with just one word and there is already
* * stuff on the queue , then flush it and re - evaluate .
*/
if ( i = = i0 & & toolong & & wrapbmp_width ( ) > 0 ) {
# ifdef WILLUSDEBUG
printf ( " wrapflush8 \n " ) ;
# endif
wrapbmp_flush ( masterinfo , 1 , pageinfo , 0 ) ;
i - - ;
continue ;
}
/*
* * If we ' re not too long and we ' re not done yet , add another word .
*/
if ( i < colbreaks - > n - 1 & & ! toolong )
continue ;
/*
* * Add the regions from i0 to i ( or i0 to i - 1 )
*/
break ;
}
if ( i > i0 & & toolong )
i - - ;
i1 = src_left_to_right ? i0 : colbreaks - > n - 1 - i ;
i2 = src_left_to_right ? i : colbreaks - > n - 1 - i0 ;
reg = ( * newregion ) ;
reg . c1 = colbreaks - > textrow [ i1 ] . c1 ;
reg . c2 = colbreaks - > textrow [ i2 ] . c2 ;
# if (WILLUSDEBUGX & 4)
printf ( " Adding i1=%d to i2=%d \n " , i1 , i2 ) ;
# endif
/* Trim the word top/bottom */
bmpregion_trim_margins ( & reg , colcount , rowcount , 0xc ) ;
reg . c1 = colbreaks - > textrow [ i1 ] . c1 ;
reg . c2 = colbreaks - > textrow [ i2 ] . c2 ;
reg . lcheight = newregion - > lcheight ;
reg . capheight = newregion - > capheight ;
reg . rowbase = newregion - > rowbase ;
reg . h5050 = newregion - > h5050 ;
if ( reg . r1 > reg . rowbase )
reg . r1 = reg . rowbase ;
if ( reg . r2 < reg . rowbase )
reg . r2 = reg . rowbase ;
/* Add it to the existing line queue */
wrapbmp_add ( & reg , gappix , line_spacing , rowbase , mean_row_gap ,
justflags ) ;
if ( toolong ) {
# ifdef WILLUSDEBUG
printf ( " wrapflush7 \n " ) ;
# endif
wrapbmp_flush ( masterinfo , 1 , pageinfo , 0 ) ;
}
i0 = i + 1 ;
}
breakinfo_free ( 106 , colbreaks ) ;
}
static WILLUSBITMAP _wrapbmp , * wrapbmp ;
static int wrapbmp_base ;
static int wrapbmp_line_spacing ;
static int wrapbmp_gap ;
static int wrapbmp_bgcolor ;
static int wrapbmp_just ;
static int wrapbmp_rhmax ;
static int wrapbmp_thmax ;
static int wrapbmp_maxgap = 2 ;
static int wrapbmp_height_extended ;
static HYPHENINFO wrapbmp_hyphen ;
void wrapbmp_init ( void )
{
wrapbmp = & _wrapbmp ;
bmp_init ( wrapbmp ) ;
wrapbmp_set_color ( dst_color ) ;
wrapbmp - > width = 0 ;
wrapbmp - > height = 0 ;
wrapbmp_base = 0 ;
wrapbmp_line_spacing = - 1 ;
wrapbmp_gap = - 1 ;
wrapbmp_bgcolor = - 1 ;
wrapbmp_height_extended = 0 ;
wrapbmp_just = 0x8f ;
wrapbmp_rhmax = - 1 ;
wrapbmp_thmax = - 1 ;
wrapbmp_hyphen . ch = - 1 ;
just_flushed_internal = 0 ;
beginning_gap_internal = - 1 ;
last_h5050_internal = - 1 ;
}
static int wrapbmp_ends_in_hyphen ( void )
{
return ( wrapbmp_hyphen . ch > = 0 ) ;
}
static void wrapbmp_set_color ( int is_color )
{
if ( is_color )
wrapbmp - > bpp = 24 ;
else {
int i ;
wrapbmp - > bpp = 8 ;
for ( i = 0 ; i < 256 ; i + + )
wrapbmp - > red [ i ] = wrapbmp - > blue [ i ] = wrapbmp - > green [ i ] = i ;
}
}
static void wrapbmp_free ( void )
{
bmp_free ( wrapbmp ) ;
}
static void wrapbmp_set_maxgap ( int value )
{
wrapbmp_maxgap = value ;
}
static int wrapbmp_width ( void )
{
return ( wrapbmp - > width ) ;
}
static int wrapbmp_remaining ( void )
{
int maxpix , w ;
maxpix = max_region_width_inches * src_dpi ;
/* Don't include hyphen if wrapbmp ends in a hyphen */
if ( wrapbmp_hyphen . ch < 0 )
w = wrapbmp - > width ;
else if ( src_left_to_right )
w = wrapbmp_hyphen . c2 + 1 ;
else
w = wrapbmp - > width - wrapbmp_hyphen . c2 ;
return ( maxpix - w ) ;
}
/*
* * region = bitmap region to add to line
* * gap = horizontal pixel gap between existing region and region being added
* * line_spacing = desired spacing between lines of text ( pixels )
* * rbase = position of baseline in region
* * gio = gap if over - - gap above top of text if it goes over line_spacing .
*/
// static int bcount=0;
static void wrapbmp_add ( BMPREGION * region , int gap , int line_spacing , int rbase ,
int gio , int just_flags )
{
WILLUSBITMAP * tmp , _tmp ;
int i , rh , th , bw , new_base , h2 , bpp , width0 ;
// static char filename[256];
# ifdef WILLUSDEBUG
printf ( " @wrapbmp_add %d x %d (w=%d). \n " , region - > c2 - region - > c1 + 1 , region - > r2 - region - > r1 + 1 , wrapbmp - > width ) ;
# endif
bmpregion_hyphen_detect ( region ) ; /* Figure out if what we're adding ends in a hyphen */
if ( wrapbmp_ends_in_hyphen ( ) )
gap = 0 ;
wrapbmp_hyphen_erase ( ) ;
just_flushed_internal = 0 ; // Reset "just flushed" flag
beginning_gap_internal = - 1 ; // Reset top-of-page or top-of-column gap
last_h5050_internal = - 1 ; // Reset last row font size
if ( line_spacing > wrapbmp_line_spacing )
wrapbmp_line_spacing = line_spacing ;
if ( gio > wrapbmp_gap )
wrapbmp_gap = gio ;
wrapbmp_bgcolor = region - > bgcolor ;
wrapbmp_just = just_flags ;
/*
printf ( " c1=%d, c2=%d, r1=%d, r2=%d \n " , region - > c1 , region - > c2 , region - > r1 , region - > r2 ) ;
printf ( " gap=%d, line_spacing=%d, rbase=%d, gio=%d \n " , gap , line_spacing , rbase , gio ) ;
*/
bpp = dst_color ? 3 : 1 ;
rh = rbase - region - > r1 + 1 ;
if ( rh > wrapbmp_rhmax )
wrapbmp_rhmax = rh ;
th = rh + ( region - > r2 - rbase ) ;
if ( th > wrapbmp_thmax )
wrapbmp_thmax = th ;
/*
{
WILLUSBITMAP * bmp , _bmp ;
bmp = & _bmp ;
bmp_init ( bmp ) ;
bmp - > height = region - > r2 - region - > r1 + 1 ;
bmp - > width = region - > c2 - region - > c1 + 1 ;
bmp - > bpp = bpp * 8 ;
if ( bpp = = 1 )
for ( i = 0 ; i < 256 ; i + + )
bmp - > red [ i ] = bmp - > blue [ i ] = bmp - > green [ i ] = i ;
bmp_alloc ( bmp ) ;
bw = bmp_bytewidth ( bmp ) ;
memset ( bmp_rowptr_from_top ( bmp , 0 ) , 255 , bw * bmp - > height ) ;
for ( i = region - > r1 ; i < = region - > r2 ; i + + )
{
unsigned char * d , * s ;
d = bmp_rowptr_from_top ( bmp , i - region - > r1 ) ;
s = bmp_rowptr_from_top ( dst_color ? region - > bmp : region - > bmp8 , i ) + bpp * region - > c1 ;
if ( i = = rbase )
memset ( d , 0 , bw ) ;
else
memcpy ( d , s , bw ) ;
}
sprintf ( filename , " out%05d.png " , bcount + + ) ;
bmp_write ( bmp , filename , stdout , 100 ) ;
bmp_free ( bmp ) ;
}
*/
if ( wrapbmp - > width = = 0 ) {
/* Put appropriate gap in */
if ( last_rowbase_internal > = 0
& & rh < wrapbmp_line_spacing - last_rowbase_internal ) {
rh = wrapbmp_line_spacing - last_rowbase_internal ;
if ( rh < 2 )
rh = 2 ;
th = rh + ( region - > r2 - rbase ) ;
wrapbmp_height_extended = 0 ;
} else
wrapbmp_height_extended = ( last_rowbase_internal > = 0 ) ;
wrapbmp_base = rh - 1 ;
wrapbmp - > height = th ;
# ifdef WILLUSDEBUG
printf ( " @wrapbmp_add: bmpheight set to %d (wls=%d, lrbi=%d) \n " , wrapbmp - > height , wrapbmp_line_spacing , last_rowbase_internal ) ;
# endif
wrapbmp - > width = region - > c2 - region - > c1 + 1 ;
bmp_alloc ( wrapbmp ) ;
bw = bmp_bytewidth ( wrapbmp ) ;
memset ( bmp_rowptr_from_top ( wrapbmp , 0 ) , 255 , bw * wrapbmp - > height ) ;
for ( i = region - > r1 ; i < = region - > r2 ; i + + ) {
unsigned char * d , * s ;
d = bmp_rowptr_from_top ( wrapbmp , wrapbmp_base + ( i - rbase ) ) ;
s = bmp_rowptr_from_top ( dst_color ? region - > bmp : region - > bmp8 , i )
+ bpp * region - > c1 ;
memcpy ( d , s , bw ) ;
}
# ifdef WILLUSDEBUG
if ( wrapbmp - > height < = wrapbmp_base )
{
printf ( " 1. SCREEECH! \n " ) ;
printf ( " wrapbmp = %d x %d, base=%d \n " , wrapbmp - > width , wrapbmp - > height , wrapbmp_base ) ;
exit ( 10 ) ;
}
# endif
/* Copy hyphen info from added region */
wrapbmp_hyphen = region - > hyphen ;
if ( wrapbmp_ends_in_hyphen ( ) ) {
wrapbmp_hyphen . r1 + = ( wrapbmp_base - rbase ) ;
wrapbmp_hyphen . r2 + = ( wrapbmp_base - rbase ) ;
wrapbmp_hyphen . ch - = region - > c1 ;
wrapbmp_hyphen . c2 - = region - > c1 ;
}
return ;
}
width0 = wrapbmp - > width ; /* Starting wrapbmp width */
tmp = & _tmp ;
bmp_init ( tmp ) ;
bmp_copy ( tmp , wrapbmp ) ;
tmp - > width + = gap + region - > c2 - region - > c1 + 1 ;
if ( rh > wrapbmp_base ) {
wrapbmp_height_extended = 1 ;
new_base = rh - 1 ;
} else
new_base = wrapbmp_base ;
if ( region - > r2 - rbase > wrapbmp - > height - 1 - wrapbmp_base )
h2 = region - > r2 - rbase ;
else
h2 = wrapbmp - > height - 1 - wrapbmp_base ;
tmp - > height = new_base + h2 + 1 ;
bmp_alloc ( tmp ) ;
bw = bmp_bytewidth ( tmp ) ;
memset ( bmp_rowptr_from_top ( tmp , 0 ) , 255 , bw * tmp - > height ) ;
bw = bmp_bytewidth ( wrapbmp ) ;
/*
printf ( " 3. wbh=%d x %d, tmp=%d x %d x %d, new_base=%d, wbbase=%d \n " , wrapbmp - > width , wrapbmp - > height , tmp - > width , tmp - > height , tmp - > bpp , new_base , wrapbmp_base ) ;
*/
for ( i = 0 ; i < wrapbmp - > height ; i + + ) {
unsigned char * d , * s ;
d = bmp_rowptr_from_top ( tmp , i + new_base - wrapbmp_base )
+ ( src_left_to_right ? 0 : tmp - > width - 1 - wrapbmp - > width )
* bpp ;
s = bmp_rowptr_from_top ( wrapbmp , i ) ;
memcpy ( d , s , bw ) ;
}
bw = bpp * ( region - > c2 - region - > c1 + 1 ) ;
if ( region - > r1 + new_base - rbase < 0
| | region - > r2 + new_base - rbase > tmp - > height - 1 ) {
aprintf ( ANSI_YELLOW " INTERNAL ERROR--TMP NOT DIMENSIONED PROPERLY. \n " ) ;
aprintf ( " (%d-%d), tmp->height=%d \n " ANSI_NORMAL ,
region - > r1 + new_base - rbase , region - > r2 + new_base - rbase ,
tmp - > height ) ;
exit ( 10 ) ;
}
for ( i = region - > r1 ; i < = region - > r2 ; i + + ) {
unsigned char * d , * s ;
d = bmp_rowptr_from_top ( tmp , i + new_base - rbase )
+ ( src_left_to_right ? wrapbmp - > width + gap : 0 ) * bpp ;
s = bmp_rowptr_from_top ( dst_color ? region - > bmp : region - > bmp8 , i )
+ bpp * region - > c1 ;
memcpy ( d , s , bw ) ;
}
bmp_copy ( wrapbmp , tmp ) ;
bmp_free ( tmp ) ;
/* Copy region's hyphen info */
wrapbmp_hyphen = region - > hyphen ;
if ( wrapbmp_ends_in_hyphen ( ) ) {
wrapbmp_hyphen . r1 + = ( new_base - rbase ) ;
wrapbmp_hyphen . r2 + = ( new_base - rbase ) ;
if ( src_left_to_right ) {
wrapbmp_hyphen . ch + = width0 + gap - region - > c1 ;
wrapbmp_hyphen . c2 + = width0 + gap - region - > c1 ;
} else {
wrapbmp_hyphen . ch - = region - > c1 ;
wrapbmp_hyphen . c2 - = region - > c1 ;
}
}
wrapbmp_base = new_base ;
# ifdef WILLUSDEBUG
if ( wrapbmp - > height < = wrapbmp_base )
{
printf ( " 2. SCREEECH! \n " ) ;
printf ( " wrapbmp = %d x %d, base=%d \n " , wrapbmp - > width , wrapbmp - > height , wrapbmp_base ) ;
exit ( 10 ) ;
}
# endif
}
static void wrapbmp_flush ( MASTERINFO * masterinfo , int allow_full_justification ,
PAGEINFO * pageinfo , int use_bgi )
{
BMPREGION region ;
WILLUSBITMAP * bmp8 , _bmp8 ;
int gap , just , nomss , dh ;
int * colcount , * rowcount ;
static char * funcname = " wrapbmp_flush " ;
// char filename[256];
if ( wrapbmp - > width < = 0 ) {
if ( use_bgi = = 1 & & beginning_gap_internal > 0 )
dst_add_gap_src_pixels ( " wrapbmp_bgi0 " , masterinfo ,
beginning_gap_internal ) ;
beginning_gap_internal = - 1 ;
last_h5050_internal = - 1 ;
if ( use_bgi )
just_flushed_internal = 1 ;
return ;
}
# ifdef WILLUSDEBUG
printf ( " @wrapbmp_flush() \n " ) ;
# endif
/*
{
char filename [ 256 ] ;
int i ;
static int bcount = 0 ;
for ( i = 0 ; i < wrapbmp - > height ; i + + )
{
unsigned char * p ;
int j ;
p = bmp_rowptr_from_top ( wrapbmp , i ) ;
for ( j = 0 ; j < wrapbmp - > width ; j + + )
if ( p [ j ] > 240 )
p [ j ] = 192 ;
}
sprintf ( filename , " out%05d.png " , bcount + + ) ;
bmp_write ( wrapbmp , filename , stdout , 100 ) ;
}
*/
colcount = rowcount = NULL ;
willus_dmem_alloc_warn ( 19 , ( void * * ) & colcount ,
( wrapbmp - > width + 16 ) * sizeof ( int ) , funcname , 10 ) ;
willus_dmem_alloc_warn ( 20 , ( void * * ) & rowcount ,
( wrapbmp - > height + 16 ) * sizeof ( int ) , funcname , 10 ) ;
region . c1 = 0 ;
region . c2 = wrapbmp - > width - 1 ;
region . r1 = 0 ;
region . r2 = wrapbmp - > height - 1 ;
region . rowbase = wrapbmp_base ;
region . bmp = wrapbmp ;
region . bgcolor = wrapbmp_bgcolor ;
# ifdef WILLUSDEBUG
printf ( " Bitmap is %d x %d (baseline=%d) \n " , wrapbmp - > width , wrapbmp - > height , wrapbmp_base ) ;
# endif
/* Sanity check on row spacing -- don't let it be too large. */
nomss = wrapbmp_rhmax * 1.7 ; /* Nominal single-spaced height for this row */
if ( last_rowbase_internal < 0 )
dh = 0 ;
else {
dh = ( int ) ( wrapbmp_line_spacing - last_rowbase_internal
- 1.2 * fabs ( vertical_line_spacing ) * nomss + .5 ) ;
if ( vertical_line_spacing < 0. ) {
int dh1 ;
if ( wrapbmp_maxgap > 0 )
dh1 = region . rowbase + 1 - wrapbmp_rhmax - wrapbmp_maxgap ;
else
dh1 = ( int ) ( wrapbmp_line_spacing - last_rowbase_internal
- 1.2 * nomss + .5 ) ;
if ( dh1 > dh )
dh = dh1 ;
}
}
if ( dh > 0 ) {
# ifdef WILLUSDEBUG
aprintf ( ANSI_YELLOW " dh > 0 = %d " ANSI_NORMAL " \n " , dh ) ;
printf ( " wrapbmp_line_spacing=%d \n " , wrapbmp_line_spacing ) ;
printf ( " nomss = %d \n " , nomss ) ;
printf ( " vls = %g \n " , vertical_line_spacing ) ;
printf ( " lrbi=%d \n " , last_rowbase_internal ) ;
printf ( " wrapbmp_maxgap=%d \n " , wrapbmp_maxgap ) ;
printf ( " wrapbmp_rhmax=%d \n " , wrapbmp_rhmax ) ;
# endif
region . r1 = dh ;
/*
if ( dh > 200 )
{
bmp_write ( wrapbmp , " out.png " , stdout , 100 ) ;
exit ( 10 ) ;
}
*/
}
if ( wrapbmp - > bpp = = 24 ) {
bmp8 = & _bmp8 ;
bmp_init ( bmp8 ) ;
bmp_convert_to_greyscale_ex ( bmp8 , wrapbmp ) ;
region . bmp8 = bmp8 ;
} else
region . bmp8 = wrapbmp ;
if ( gap_override_internal > 0 ) {
region . r1 = wrapbmp_base - wrapbmp_rhmax + 1 ;
if ( region . r1 < 0 )
region . r1 = 0 ;
if ( region . r1 > wrapbmp_base )
region . r1 = wrapbmp_base ;
gap = gap_override_internal ;
gap_override_internal = - 1 ;
} else {
if ( wrapbmp_height_extended )
gap = wrapbmp_gap ;
else
gap = 0 ;
}
# ifdef WILLUSDEBUG
printf ( " wf: gap=%d \n " , gap ) ;
# endif
if ( gap > 0 )
dst_add_gap_src_pixels ( " wrapbmp " , masterinfo , gap ) ;
if ( ! allow_full_justification )
just = ( wrapbmp_just & 0xcf ) | 0x20 ;
else
just = wrapbmp_just ;
bmpregion_add ( & region , NULL , masterinfo , 0 , 0 , 0 , - 1.0 , just , 2 , colcount ,
rowcount , pageinfo , 0xf , wrapbmp - > height - 1 - wrapbmp_base ) ;
if ( wrapbmp - > bpp = = 24 )
bmp_free ( bmp8 ) ;
willus_dmem_free ( 20 , ( double * * ) & rowcount , funcname ) ;
willus_dmem_free ( 19 , ( double * * ) & colcount , funcname ) ;
wrapbmp - > width = 0 ;
wrapbmp - > height = 0 ;
wrapbmp_line_spacing = - 1 ;
wrapbmp_gap = - 1 ;
wrapbmp_rhmax = - 1 ;
wrapbmp_thmax = - 1 ;
wrapbmp_hyphen . ch = - 1 ;
if ( use_bgi = = 1 & & beginning_gap_internal > 0 )
dst_add_gap_src_pixels ( " wrapbmp_bgi1 " , masterinfo ,
beginning_gap_internal ) ;
beginning_gap_internal = - 1 ;
last_h5050_internal = - 1 ;
if ( use_bgi )
just_flushed_internal = 1 ;
}
static void wrapbmp_hyphen_erase ( void )
{
WILLUSBITMAP * bmp , _bmp ;
int bw , bpp , c0 , c1 , c2 , i ;
if ( wrapbmp_hyphen . ch < 0 )
return ;
# if (WILLUSDEBUGX & 16)
printf ( " @hyphen_erase, bmp=%d x %d x %d \n " , wrapbmp - > width , wrapbmp - > height , wrapbmp - > bpp ) ;
printf ( " ch=%d, c2=%d, r1=%d, r2=%d \n " , wrapbmp_hyphen . ch , wrapbmp_hyphen . c2 , wrapbmp_hyphen . r1 , wrapbmp_hyphen . r2 ) ;
# endif
bmp = & _bmp ;
bmp_init ( bmp ) ;
bmp - > bpp = wrapbmp - > bpp ;
if ( bmp - > bpp = = 8 )
for ( i = 0 ; i < 256 ; i + + )
bmp - > red [ i ] = bmp - > blue [ i ] = bmp - > green [ i ] = i ;
bmp - > height = wrapbmp - > height ;
if ( src_left_to_right ) {
bmp - > width = wrapbmp_hyphen . c2 + 1 ;
c0 = 0 ;
c1 = wrapbmp_hyphen . ch ;
c2 = bmp - > width - 1 ;
} else {
bmp - > width = wrapbmp - > width - wrapbmp_hyphen . c2 ;
c0 = wrapbmp_hyphen . c2 ;
c1 = 0 ;
c2 = wrapbmp_hyphen . ch - wrapbmp_hyphen . c2 ;
}
bmp_alloc ( bmp ) ;
bpp = bmp - > bpp = = 24 ? 3 : 1 ;
bw = bpp * bmp - > width ;
for ( i = 0 ; i < bmp - > height ; i + + )
memcpy ( bmp_rowptr_from_top ( bmp , i ) ,
bmp_rowptr_from_top ( wrapbmp , i ) + bpp * c0 , bw ) ;
bw = ( c2 - c1 + 1 ) * bpp ;
if ( bw > 0 )
for ( i = wrapbmp_hyphen . r1 ; i < = wrapbmp_hyphen . r2 ; i + + )
memset ( bmp_rowptr_from_top ( bmp , i ) + bpp * c1 , 255 , bw ) ;
# if (WILLUSDEBUGX & 16)
{
static int count = 1 ;
char filename [ 256 ] ;
sprintf ( filename , " be%04d.png " , count ) ;
bmp_write ( wrapbmp , filename , stdout , 100 ) ;
sprintf ( filename , " ae%04d.png " , count ) ;
bmp_write ( bmp , filename , stdout , 100 ) ;
count + + ;
}
# endif
bmp_copy ( wrapbmp , bmp ) ;
bmp_free ( bmp ) ;
}
/*
* * src is only allocated if dst_color ! = 0
*/
static void white_margins ( WILLUSBITMAP * src , WILLUSBITMAP * srcgrey )
{
int i , n ;
BMPREGION * region , _region ;
region = & _region ;
region - > bmp = srcgrey ;
get_white_margins ( region ) ;
n = region - > c1 ;
for ( i = 0 ; i < srcgrey - > height ; i + + ) {
unsigned char * p ;
if ( dst_color ) {
p = bmp_rowptr_from_top ( src , i ) ;
memset ( p , 255 , n * 3 ) ;
}
p = bmp_rowptr_from_top ( srcgrey , i ) ;
memset ( p , 255 , n ) ;
}
n = srcgrey - > width - 1 - region - > c2 ;
for ( i = 0 ; i < srcgrey - > height ; i + + ) {
unsigned char * p ;
if ( dst_color ) {
p = bmp_rowptr_from_top ( src , i ) + 3 * ( src - > width - n ) ;
memset ( p , 255 , n * 3 ) ;
}
p = bmp_rowptr_from_top ( srcgrey , i ) + srcgrey - > width - n ;
memset ( p , 255 , n ) ;
}
n = region - > r1 ;
for ( i = 0 ; i < n ; i + + ) {
unsigned char * p ;
if ( dst_color ) {
p = bmp_rowptr_from_top ( src , i ) ;
memset ( p , 255 , src - > width * 3 ) ;
}
p = bmp_rowptr_from_top ( srcgrey , i ) ;
memset ( p , 255 , srcgrey - > width ) ;
}
n = srcgrey - > height - 1 - region - > r2 ;
for ( i = srcgrey - > height - n ; i < srcgrey - > height ; i + + ) {
unsigned char * p ;
if ( dst_color ) {
p = bmp_rowptr_from_top ( src , i ) ;
memset ( p , 255 , src - > width * 3 ) ;
}
p = bmp_rowptr_from_top ( srcgrey , i ) ;
memset ( p , 255 , srcgrey - > width ) ;
}
}
static void get_white_margins ( BMPREGION * region )
{
int n ;
double defval ;
defval = 0.25 ;
if ( mar_left < 0. )
mar_left = defval ;
n = ( int ) ( 0.5 + mar_left * src_dpi ) ;
if ( n > region - > bmp - > width )
n = region - > bmp - > width ;
region - > c1 = n ;
if ( mar_right < 0. )
mar_right = defval ;
n = ( int ) ( 0.5 + mar_right * src_dpi ) ;
if ( n > region - > bmp - > width )
n = region - > bmp - > width ;
region - > c2 = region - > bmp - > width - 1 - n ;
if ( mar_top < 0. )
mar_top = defval ;
n = ( int ) ( 0.5 + mar_top * src_dpi ) ;
if ( n > region - > bmp - > height )
n = region - > bmp - > height ;
region - > r1 = n ;
if ( mar_bot < 0. )
mar_bot = defval ;
n = ( int ) ( 0.5 + mar_bot * src_dpi ) ;
if ( n > region - > bmp - > height )
n = region - > bmp - > height ;
region - > r2 = region - > bmp - > height - 1 - n ;
}
/*
* * bitmap_orientation ( )
* *
* * 1.0 means neutral
* *
* * > > 1.0 means document is likely portrait ( no rotation necessary )
* * ( max is 100. )
* *
* * < < 1.0 means document is likely landscape ( need to rotate it )
* * ( min is 0.01 )
* *
*/
static double bitmap_orientation ( WILLUSBITMAP * bmp )
{
int i , ic , wtcalc ;
double hsum , vsum , rat ;
wtcalc = - 1 ;
for ( vsum = 0. , hsum = 0. , ic = 0 , i = 20 ; i < = 85 ; i + = 5 , ic + + ) {
double nv , nh ;
int wth , wtv ;
# ifdef DEBUG
printf ( " h %d: \n " , i ) ;
# endif
if ( ic = = 0 )
wth = - 1 ;
else
wth = wtcalc ;
wth = - 1 ;
nh = bmp_inflections_horizontal ( bmp , 8 , i , & wth ) ;
# ifdef DEBUG
{
FILE * f ;
f = fopen ( " inf.ep " , " a " ) ;
fprintf ( f , " /ag \n " ) ;
fclose ( f ) ;
}
printf ( " v %d: \n " , i ) ;
# endif
if ( ic = = 0 )
wtv = - 1 ;
else
wtv = wtcalc ;
wtv = - 1 ;
nv = bmp_inflections_vertical ( bmp , 8 , i , & wtv ) ;
if ( ic = = 0 ) {
if ( wtv > wth )
wtcalc = wtv ;
else
wtcalc = wth ;
continue ;
}
// exit(10);
hsum + = nh * i * i * i ;
vsum + = nv * i * i * i ;
}
if ( vsum = = 0. & & hsum = = 0. )
rat = 1.0 ;
else if ( hsum < vsum & & hsum / vsum < .01 )
rat = 100. ;
else
rat = vsum / hsum ;
if ( rat < .01 )
rat = .01 ;
// printf(" page %2d: %8.4f\n",pagenum,rat);
// fprintf(out,"\t%8.4f",vsum/hsum);
// fprintf(out,"\n");
return ( rat ) ;
}
static double bmp_inflections_vertical ( WILLUSBITMAP * srcgrey , int ndivisions ,
int delta , int * wthresh )
{
int y0 , y1 , ny , i , nw , nisum , ni , wt , wtmax ;
double * g ;
char * funcname = " bmp_inflections_vertical " ;
nw = srcgrey - > width / ndivisions ;
y0 = srcgrey - > height / 6 ;
y1 = srcgrey - > height - y0 ;
ny = y1 - y0 ;
willus_dmem_alloc_warn ( 21 , ( void * * ) & g , ny * sizeof ( double ) , funcname , 10 ) ;
wtmax = - 1 ;
for ( nisum = 0 , i = 0 ; i < 10 ; i + + ) {
int x0 , x1 , nx , j ;
x0 = ( srcgrey - > width - nw ) * ( i + 2 ) / 13 ;
x1 = x0 + nw ;
if ( x1 > srcgrey - > width )
x1 = srcgrey - > width ;
nx = x1 - x0 ;
for ( j = y0 ; j < y1 ; j + + ) {
int k , rsum ;
unsigned char * p ;
p = bmp_rowptr_from_top ( srcgrey , j ) + x0 ;
for ( rsum = k = 0 ; k < nx ; k + + , p + + )
rsum + = p [ 0 ] ;
g [ j - y0 ] = ( double ) rsum / nx ;
}
wt = ( * wthresh ) ;
ni = inflection_count ( g , ny , delta , & wt ) ;
if ( ( * wthresh ) < 0 & & ni > = 3 & & wt > wtmax )
wtmax = wt ;
if ( ni > nisum )
nisum = ni ;
}
willus_dmem_free ( 21 , & g , funcname ) ;
if ( ( * wthresh ) < 0 )
( * wthresh ) = wtmax ;
return ( nisum ) ;
}
static double bmp_inflections_horizontal ( WILLUSBITMAP * srcgrey , int ndivisions ,
int delta , int * wthresh )
{
int x0 , x1 , nx , bw , i , nh , nisum , ni , wt , wtmax ;
double * g ;
char * funcname = " bmp_inflections_vertical " ;
nh = srcgrey - > height / ndivisions ;
x0 = srcgrey - > width / 6 ;
x1 = srcgrey - > width - x0 ;
nx = x1 - x0 ;
bw = bmp_bytewidth ( srcgrey ) ;
willus_dmem_alloc_warn ( 22 , ( void * * ) & g , nx * sizeof ( double ) , funcname , 10 ) ;
wtmax = - 1 ;
for ( nisum = 0 , i = 0 ; i < 10 ; i + + ) {
int y0 , y1 , ny , j ;
y0 = ( srcgrey - > height - nh ) * ( i + 2 ) / 13 ;
y1 = y0 + nh ;
if ( y1 > srcgrey - > height )
y1 = srcgrey - > height ;
ny = y1 - y0 ;
for ( j = x0 ; j < x1 ; j + + ) {
int k , rsum ;
unsigned char * p ;
p = bmp_rowptr_from_top ( srcgrey , y0 ) + j ;
for ( rsum = k = 0 ; k < ny ; k + + , p + = bw )
rsum + = p [ 0 ] ;
g [ j - x0 ] = ( double ) rsum / ny ;
}
wt = ( * wthresh ) ;
ni = inflection_count ( g , nx , delta , & wt ) ;
if ( ( * wthresh ) < 0 & & ni > = 3 & & wt > wtmax )
wtmax = wt ;
if ( ni > nisum )
nisum = ni ;
}
willus_dmem_free ( 22 , & g , funcname ) ;
if ( ( * wthresh ) < 0 )
( * wthresh ) = wtmax ;
return ( nisum ) ;
}
static int inflection_count ( double * x , int n , int delta , int * wthresh )
{
int i , i0 , ni , ww , c , ct , wt , mode ;
double meandi , meandisq , f1 , f2 , stdev ;
double * xs ;
static int hist [ 256 ] ;
static char * funcname = " inflection_count " ;
/* Find threshold white value that peaks must exceed */
if ( ( * wthresh ) < 0 ) {
for ( i = 0 ; i < 256 ; i + + )
hist [ i ] = 0 ;
for ( i = 0 ; i < n ; i + + ) {
i0 = floor ( x [ i ] ) ;
if ( i0 > 255 )
i0 = 255 ;
hist [ i0 ] + + ;
}
ct = n * .15 ;
for ( c = 0 , i = 255 ; i > = 0 ; i - - ) {
c + = hist [ i ] ;
if ( c > ct )
break ;
}
wt = i - 10 ;
if ( wt < 192 )
wt = 192 ;
# ifdef DEBUG
printf ( " wt=%d \n " , wt ) ;
# endif
( * wthresh ) = wt ;
} else
wt = ( * wthresh ) ;
ww = n / 150 ;
if ( ww < 1 )
ww = 1 ;
willus_dmem_alloc_warn ( 23 , ( void * * ) & xs , sizeof ( double ) * n , funcname , 10 ) ;
for ( i = 0 ; i < n - ww ; i + + ) {
int j ;
for ( xs [ i ] = 0. , j = 0 ; j < ww ; j + + , xs [ i ] + = x [ i + j ] )
;
xs [ i ] / = ww ;
}
meandi = meandisq = 0. ;
if ( xs [ 0 ] < = wt - delta )
mode = 1 ;
else if ( xs [ 0 ] > = wt )
mode = - 1 ;
else
mode = 0 ;
for ( i0 = 0 , ni = 0 , i = 1 ; i < n - ww ; i + + ) {
if ( mode = = 1 & & xs [ i ] > = wt ) {
if ( i0 > 0 ) {
meandi + = i - i0 ;
meandisq + = ( i - i0 ) * ( i - i0 ) ;
ni + + ;
}
i0 = i ;
mode = - 1 ;
continue ;
}
if ( xs [ i ] < = wt - delta )
mode = 1 ;
}
stdev = 1.0 ; /* Avoid compiler warning */
if ( ni > 0 ) {
meandi / = ni ;
meandisq / = ni ;
stdev = sqrt ( fabs ( meandi * meandi - meandisq ) ) ;
}
f1 = meandi / n ;
if ( f1 > .15 )
f1 = .15 ;
if ( ni > 2 ) {
if ( stdev / meandi < .05 )
f2 = 20. ;
else
f2 = meandi / stdev ;
} else
f2 = 1. ;
# ifdef DEBUG
printf ( " ni=%3d, f1=%8.4f, f2=%8.4f, f1*f2*ni=%8.4f \n " , ni , f1 , f2 , f1 * f2 * ni ) ;
{
static int count = 0 ;
FILE * f ;
int i ;
f = fopen ( " inf.ep " , count = = 0 ? " w " : " a " ) ;
count + + ;
fprintf ( f , " /sa l \" %d \" 1 \n " , ni ) ;
for ( i = 0 ; i < n - ww ; i + + )
fprintf ( f , " %g \n " , xs [ i ] ) ;
fprintf ( f , " //nc \n " ) ;
fclose ( f ) ;
}
# endif /* DEBUG */
willus_dmem_free ( 23 , & xs , funcname ) ;
return ( f1 * f2 * ni ) ;
}
static void pdfboxes_init ( PDFBOXES * boxes )
{
boxes - > n = boxes - > na = 0 ;
boxes - > box = NULL ;
}
static void pdfboxes_free ( PDFBOXES * boxes )
{
static char * funcname = " pdfboxes_free " ;
willus_dmem_free ( 24 , ( double * * ) & boxes - > box , funcname ) ;
}
# ifdef COMMENT
static void pdfboxes_add_box ( PDFBOXES * boxes , PDFBOX * box )
{
static char * funcname = " pdfboxes_add_box " ;
if ( boxes - > n > = boxes - > na )
{
int newsize ;
newsize = boxes - > na < 1024 ? 2048 : boxes - > na * 2 ;
/* Just calls willus_mem_alloc if oldsize==0 */
willus_mem_realloc_robust_warn ( ( void * * ) & boxes - > box , newsize * sizeof ( PDFBOX ) ,
boxes - > na * sizeof ( PDFBOX ) , funcname , 10 ) ;
boxes - > na = newsize ;
}
boxes - > box [ boxes - > n + + ] = ( * box ) ;
}
static void pdfboxes_delete ( PDFBOXES * boxes , int n )
{
if ( n > 0 & & n < boxes - > n )
{
int i ;
for ( i = 0 ; i < boxes - > n - n ; i + + )
boxes - > box [ i ] = boxes - > box [ i + n ] ;
}
boxes - > n - = n ;
if ( boxes - > n < 0 )
boxes - > n = 0 ;
}
# endif
/*
* * Track gaps between words so that we can tell when one is out of family .
* * lcheight = height of a lowercase letter .
*/
static void word_gaps_add ( BREAKINFO * breakinfo , int lcheight ,
double * median_gap )
{
static int nn = 0 ;
static double gap [ 1024 ] ;
static char * funcname = " word_gaps_add " ;
if ( breakinfo ! = NULL & & breakinfo - > n > 1 ) {
int i ;
for ( i = 0 ; i < breakinfo - > n - 1 ; i + + ) {
double g ;
g = ( double ) breakinfo - > textrow [ i ] . gap / lcheight ;
if ( g > = word_spacing ) {
gap [ nn & 0x3ff ] = g ;
nn + + ;
}
}
}
if ( median_gap ! = NULL ) {
if ( nn > 0 ) {
int n ;
static double * gap_sorted ;
n = ( nn > 1024 ) ? 1024 : nn ;
willus_dmem_alloc_warn ( 28 , ( void * * ) & gap_sorted ,
sizeof ( double ) * n , funcname , 10 ) ;
memcpy ( gap_sorted , gap , n * sizeof ( double ) ) ;
sortd ( gap_sorted , n ) ;
( * median_gap ) = gap_sorted [ n / 2 ] ;
willus_dmem_free ( 28 , & gap_sorted , funcname ) ;
} else
( * median_gap ) = 0.7 ;
}
}
/*
* * bmp must be grayscale ! ( cbmp = color , can be null )
*/
static void bmp_detect_vertical_lines ( WILLUSBITMAP * bmp , WILLUSBITMAP * cbmp ,
double dpi , double minwidth_in , double maxwidth_in , double minheight_in ,
double anglemax_deg , int white_thresh )
{
int tc , iangle , irow , icol ;
int rowstep , na , angle_sign , ccthresh ;
int pixmin , halfwidth , bytewidth ;
int bs1 , nrsteps , dp ;
double anglestep ;
WILLUSBITMAP * tmp , _tmp ;
unsigned char * p0 ;
if ( debug )
printf ( " At bmp_detect_vertical_lines... \n " ) ;
if ( ! bmp_is_grayscale ( bmp ) ) {
printf (
" Internal error. bmp_detect_vertical_lines passed a non-grayscale bitmap. \n " ) ;
exit ( 10 ) ;
}
tmp = & _tmp ;
bmp_init ( tmp ) ;
bmp_copy ( tmp , bmp ) ;
dp = bmp_rowptr_from_top ( tmp , 0 ) - bmp_rowptr_from_top ( bmp , 0 ) ;
bytewidth = bmp_bytewidth ( bmp ) ;
pixmin = ( int ) ( minwidth_in * dpi + .5 ) ;
if ( pixmin < 1 )
pixmin = 1 ;
halfwidth = pixmin / 4 ;
if ( halfwidth < 1 )
halfwidth = 1 ;
anglestep = atan2 ( ( double ) halfwidth / dpi , minheight_in ) ;
na = ( int ) ( ( anglemax_deg * PI / 180. ) / anglestep + .5 ) ;
if ( na < 1 )
na = 1 ;
rowstep = ( int ) ( dpi / 40. + .5 ) ;
if ( rowstep < 2 )
rowstep = 2 ;
nrsteps = bmp - > height / rowstep ;
bs1 = bytewidth * rowstep ;
ccthresh = ( int ) ( minheight_in * dpi / rowstep + .5 ) ;
if ( ccthresh < 2 )
ccthresh = 2 ;
if ( debug & & verbose )
printf (
" na = %d, rowstep = %d, ccthresh = %d, white_thresh = %d, nrsteps=%d \n " ,
na , rowstep , ccthresh , white_thresh , nrsteps ) ;
/*
bmp_write ( bmp , " out.png " , stdout , 97 ) ;
wfile_written_info ( " out.png " , stdout ) ;
*/
p0 = bmp_rowptr_from_top ( bmp , 0 ) ;
for ( tc = 0 ; tc < 100 ; tc + + ) {
int ccmax , ic0max , ir0max ;
double tanthmax ;
ccmax = - 1 ;
ic0max = ir0max = 0 ;
tanthmax = 0. ;
for ( iangle = 0 ; iangle < = na ; iangle + + ) {
for ( angle_sign = 1 ; angle_sign > = - 1 ; angle_sign - = 2 ) {
double th , tanth , tanthx ;
int ic1 , ic2 ;
if ( iangle = = 0 & & angle_sign = = - 1 )
continue ;
th = ( PI / 180. ) * iangle * angle_sign * fabs ( anglemax_deg )
/ na ;
tanth = tan ( th ) ;
tanthx = tanth * rowstep ;
if ( angle_sign = = 1 ) {
ic1 = - ( int ) ( bmp - > height * tanth + 1. ) ;
ic2 = bmp - > width - 1 ;
} else {
ic1 = ( int ) ( - bmp - > height * tanth + 1. ) ;
ic2 = bmp - > width - 1 + ( int ) ( - bmp - > height * tanth + 1. ) ;
}
// printf("iangle=%2d, angle_sign=%2d, ic1=%4d, ic2=%4d\n",iangle,angle_sign,ic1,ic2);
for ( icol = ic1 ; icol < = ic2 ; icol + + ) {
unsigned char * p ;
int cc , ic0 , ir0 ;
p = p0 ;
if ( icol < 0 | | icol > bmp - > width - 1 )
for ( irow = 0 ; irow < nrsteps ; irow + + , p + = bs1 ) {
int ic ;
ic = icol + irow * tanthx ;
if ( ic > = 0 & & ic < bmp - > width )
break ;
}
else
irow = 0 ;
for ( ir0 = ic0 = cc = 0 ; irow < nrsteps ; irow + + , p + = bs1 ) {
int ic ;
ic = icol + irow * tanthx ;
if ( ic < 0 | | ic > = bmp - > width )
break ;
if ( ( p [ ic ] < white_thresh
| | p [ ic + bytewidth ] < white_thresh )
& & ( p [ ic + dp ] < white_thresh
| | p [ ic + bytewidth + dp ] < white_thresh ) ) {
if ( cc = = 0 ) {
ic0 = ic ;
ir0 = irow * rowstep ;
}
cc + + ;
if ( cc > ccmax ) {
ccmax = cc ;
tanthmax = tanth ;
ic0max = ic0 ;
ir0max = ir0 ;
}
} else
cc = 0 ;
}
}
}
}
if ( ccmax < ccthresh )
break ;
if ( debug )
printf (
" Vert line detected: ccmax=%d (pix=%d), tanthmax=%g, ic0max=%d, ir0max=%d \n " ,
ccmax , ccmax * rowstep , tanthmax , ic0max , ir0max ) ;
if ( ! vert_line_erase ( bmp , cbmp , tmp , ir0max , ic0max , tanthmax ,
minheight_in , minwidth_in , maxwidth_in , white_thresh ) )
break ;
}
/*
bmp_write ( tmp , " outt.png " , stdout , 95 ) ;
wfile_written_info ( " outt.png " , stdout ) ;
bmp_write ( bmp , " out2.png " , stdout , 95 ) ;
wfile_written_info ( " out2.png " , stdout ) ;
exit ( 10 ) ;
*/
}
/*
* * Calculate max vert line length . Line is terminated by nw consecutive white pixels
* * on either side .
*/
static int vert_line_erase ( WILLUSBITMAP * bmp , WILLUSBITMAP * cbmp ,
WILLUSBITMAP * tmp , int row0 , int col0 , double tanth ,
double minheight_in , double minwidth_in , double maxwidth_in ,
int white_thresh )
{
int lw , cc , maxdev , nw , dir , i , n ;
int * c1 , * c2 , * w ;
static char * funcname = " vert_line_erase " ;
willus_dmem_alloc_warn ( 26 , ( void * * ) & c1 , sizeof ( int ) * 3 * bmp - > height ,
funcname , 10 ) ;
c2 = & c1 [ bmp - > height ] ;
w = & c2 [ bmp - > height ] ;
/*
maxdev = ( int ) ( ( double ) bmp - > height / minheight_in + .5 ) ;
if ( maxdev < 3 )
maxdev = 3 ;
*/
nw = ( int ) ( ( double ) src_dpi / 100. + .5 ) ;
if ( nw < 2 )
nw = 2 ;
maxdev = nw ;
for ( i = 0 ; i < bmp - > height ; i + + )
c1 [ i ] = c2 [ i ] = - 1 ;
n = 0 ;
for ( dir = - 1 ; dir < = 1 ; dir + = 2 ) {
int del , brc ;
brc = 0 ;
for ( del = ( dir = = - 1 ) ? 0 : 1 ; 1 ; del + + ) {
int r , c ;
unsigned char * p ;
r = row0 + dir * del ;
if ( r < 0 | | r > bmp - > height - 1 )
break ;
c = col0 + ( r - row0 ) * tanth ;
if ( c < 0 | | c > bmp - > width - 1 )
break ;
p = bmp_rowptr_from_top ( bmp , r ) ;
for ( i = c ; i < = c + maxdev & & i < bmp - > width ; i + + )
if ( p [ i ] < white_thresh )
break ;
if ( i > c + maxdev | | i > = bmp - > width ) {
for ( i = c - 1 ; i > = c - maxdev & & i > = 0 ; i - - )
if ( p [ i ] < white_thresh )
break ;
if ( i < c - maxdev | | i < 0 ) {
brc + + ;
if ( brc > = nw )
break ;
continue ;
}
}
brc = 0 ;
for ( c = i , cc = 0 ; i < bmp - > width ; i + + )
if ( p [ i ] < white_thresh )
cc = 0 ;
else {
cc + + ;
if ( cc > = nw )
break ;
}
c2 [ r ] = i - cc ;
if ( c2 [ r ] > bmp - > width - 1 )
c2 [ r ] = bmp - > width - 1 ;
for ( cc = 0 , i = c ; i > = 0 ; i - - )
if ( p [ i ] < white_thresh )
cc = 0 ;
else {
cc + + ;
if ( cc > = nw )
break ;
}
c1 [ r ] = i + cc ;
if ( c1 [ r ] < 0 )
c1 [ r ] = 0 ;
w [ n + + ] = c2 [ r ] - c1 [ r ] + 1 ;
c1 [ r ] - = cc ;
if ( c1 [ r ] < 0 )
c1 [ r ] = 0 ;
c2 [ r ] + = cc ;
if ( c2 [ r ] > bmp - > width - 1 )
c2 [ r ] = bmp - > width - 1 ;
}
}
if ( n > 1 )
sorti ( w , n ) ;
if ( n < 10 | | n < minheight_in * src_dpi | | w [ n / 4 ] < minwidth_in * src_dpi
| | w [ 3 * n / 4 ] > maxwidth_in * src_dpi
| | ( erase_vertical_lines = = 1 & & w [ n - 1 ] > maxwidth_in * src_dpi ) ) {
/* Erase area in temp bitmap */
for ( i = 0 ; i < bmp - > height ; i + + ) {
unsigned char * p ;
int cmax ;
if ( c1 [ i ] < 0 | | c2 [ i ] < 0 )
continue ;
cmax = ( c2 [ i ] - c1 [ i ] ) + 1 ;
p = bmp_rowptr_from_top ( tmp , i ) + c1 [ i ] ;
for ( ; cmax > 0 ; cmax - - , p + + )
( * p ) = 255 ;
}
} else {
/* Erase line width in source bitmap */
lw = w [ 3 * n / 4 ] + nw * 2 ;
if ( lw > maxwidth_in * src_dpi / 2 )
lw = maxwidth_in * src_dpi / 2 ;
for ( i = 0 ; i < bmp - > height ; i + + ) {
unsigned char * p ;
int c0 , cmin , cmax , count , white ;
if ( c1 [ i ] < 0 | | c2 [ i ] < 0 )
continue ;
c0 = col0 + ( i - row0 ) * tanth ;
cmin = c0 - lw - 1 ;
if ( cmin < c1 [ i ] )
cmin = c1 [ i ] ;
cmax = c0 + lw + 1 ;
if ( cmax > c2 [ i ] )
cmax = c2 [ i ] ;
p = bmp_rowptr_from_top ( bmp , i ) ;
c0 = ( p [ cmin ] > p [ cmax ] ) ? cmin : cmax ;
white = p [ c0 ] ;
if ( white < = white_thresh )
white = white_thresh + 1 ;
if ( white > 255 )
white = 255 ;
count = ( cmax - cmin ) + 1 ;
p = & p [ cmin ] ;
for ( ; count > 0 ; count - - , p + + )
( * p ) = white ;
if ( cbmp ! = NULL ) {
unsigned char * p0 ;
p = bmp_rowptr_from_top ( cbmp , i ) ;
p0 = p + c0 * 3 ;
p = p + cmin * 3 ;
count = ( cmax - cmin ) + 1 ;
for ( ; count > 0 ; count - - , p + = 3 ) {
p [ 0 ] = p0 [ 0 ] ;
p [ 1 ] = p0 [ 1 ] ;
p [ 2 ] = p0 [ 2 ] ;
}
}
}
}
willus_dmem_free ( 26 , ( double * * ) & c1 , funcname ) ;
return ( 1 ) ;
}
/*
* * mem_index . . . controls which memory allocactions get a protective margin
* * around them .
*/
static int mem_index_min = 999 ;
static int mem_index_max = 999 ;
static void willus_dmem_alloc_warn ( int index , void * * ptr , int size ,
char * funcname , int exitcode )
{
if ( index > = mem_index_min & & index < = mem_index_max ) {
char * ptr1 ;
void * x ;
willus_mem_alloc_warn ( ( void * * ) & ptr1 , size + 2048 , funcname , exitcode ) ;
ptr1 + = 1024 ;
x = ( void * ) ptr1 ;
( * ptr ) = x ;
} else
willus_mem_alloc_warn ( ptr , size , funcname , exitcode ) ;
}
static void willus_dmem_free ( int index , double * * ptr , char * funcname )
{
if ( ( * ptr ) = = NULL )
return ;
if ( index > = mem_index_min & & index < = mem_index_max ) {
double * x ;
char * ptr1 ;
x = ( * ptr ) ;
ptr1 = ( char * ) x ;
ptr1 - = 1024 ;
x = ( double * ) ptr1 ;
willus_mem_free ( & x , funcname ) ;
( * ptr ) = NULL ;
} else
willus_mem_free ( ptr , funcname ) ;
}
/* mem.c */
/*
* * The reason I don ' t simply use malloc is because I want to allocate
* * memory using type long instead of type size_t . On some compilers ,
* * like gcc , these are the same , so it doesn ' t matter . On other
* * compilers , like Turbo C , these are different .
* *
*/
static int willus_mem_alloc ( double * * ptr , long size , char * name )
{
# if (defined(WIN32) && !defined(__DMC__))
unsigned long memsize ;
memsize = ( unsigned long ) size ;
# ifdef USEGLOBAL
( * ptr ) = ( memsize = = size ) ? ( double * ) GlobalAlloc ( GPTR , memsize ) : NULL ;
# else
( * ptr ) = ( memsize = = size ) ? ( double * ) CoTaskMemAlloc ( memsize ) : NULL ;
# endif
# else
size_t memsize ;
memsize = ( size_t ) size ;
( * ptr ) = ( memsize = = size ) ? ( double * ) malloc ( memsize ) : NULL ;
# endif
/*
{
f = fopen ( " mem.dat " , " a " ) ;
fprintf ( f , " willus_mem_alloc(%d,%s) \n " , size , name ) ;
fclose ( f ) ;
}
*/
return ( ( * ptr ) ! = NULL ) ;
}
/*
* * Prints an integer to ' s ' with commas separating every three digits .
* * E . g . 45 , 399 , 350
* * Correctly handles negative values .
*/
static void comma_print ( char * s , long size )
{
int i , m , neg ;
char tbuf [ 80 ] ;
if ( ! size )
{
s [ 0 ] = ' 0 ' ;
s [ 1 ] = ' \0 ' ;
return ;
}
s [ 0 ] = ' \0 ' ;
neg = 0 ;
if ( size < 0 )
{
size = - size ;
neg = 1 ;
}
for ( i = 0 , m = size % 1000 ; size ; i + + , size = ( size - m ) / 1000 , m = size % 1000 )
{
sprintf ( tbuf , m = = size ? " %d%s " : " %03d%s " , m , i > 0 ? " , " : " " ) ;
strcat ( tbuf , s ) ;
strcpy ( s , tbuf ) ;
}
if ( neg )
{
strcpy ( tbuf , " - " ) ;
strcat ( tbuf , s ) ;
strcpy ( s , tbuf ) ;
}
}
static void mem_warn ( char * name , int size , int exitcode )
{
static char buf [ 128 ] ;
aprintf ( " \n " ANSI_RED " \a Cannot allocate enough memory for "
" function %s. " ANSI_NORMAL " \n " , name ) ;
comma_print ( buf , size ) ;
aprintf ( " " ANSI_RED " (Needed %s bytes.) " ANSI_NORMAL " \n \n " , buf ) ;
if ( exitcode ! = 0 )
{
aprintf ( " " ANSI_RED " Program terminated. " ANSI_NORMAL " \n \n " ) ;
exit ( exitcode ) ;
}
}
static int willus_mem_alloc_warn ( void * * ptr , int size , char * name , int exitcode )
{
int status ;
status = willus_mem_alloc ( ( double * * ) ptr , ( long ) size , name ) ;
if ( ! status )
mem_warn ( name , size , exitcode ) ;
return ( status ) ;
}
static void willus_mem_free ( double * * ptr , char * name )
{
if ( ( * ptr ) ! = NULL ) {
# if (defined(WIN32) && !defined(__DMC__))
# ifdef USEGLOBAL
GlobalFree ( ( void * ) ( * ptr ) ) ;
# else
CoTaskMemFree ( ( void * ) ( * ptr ) ) ;
# endif
# else
free ( ( void * ) ( * ptr ) ) ;
# endif
( * ptr ) = NULL ;
}
}
static int willus_mem_realloc_robust ( double * * ptr , long newsize , long oldsize , char * name )
{
# if (defined(WIN32) && !defined(__DMC__))
unsigned long memsize ;
void * newptr ;
# else
size_t memsize ;
void * newptr ;
# endif
# if (defined(WIN32) && !defined(__DMC__))
memsize = ( unsigned long ) newsize ;
# else
memsize = ( size_t ) newsize ;
# endif
if ( memsize ! = newsize )
return ( 0 ) ;
if ( ( * ptr ) = = NULL | | oldsize < = 0 )
return ( willus_mem_alloc ( ptr , newsize , name ) ) ;
# if (defined(WIN32) && !defined(__DMC__))
# ifdef USEGLOBAL
newptr = ( void * ) GlobalReAlloc ( ( void * ) ( * ptr ) , memsize , GMEM_MOVEABLE ) ;
# else
newptr = ( void * ) CoTaskMemRealloc ( ( void * ) ( * ptr ) , memsize ) ;
# endif
# else
newptr = realloc ( ( void * ) ( * ptr ) , memsize ) ;
# endif
if ( newptr = = NULL & & willus_mem_alloc ( ( double * * ) & newptr , newsize , name ) )
{
memcpy ( newptr , ( * ptr ) , oldsize ) ;
willus_mem_free ( ptr , name ) ;
}
if ( newptr = = NULL )
return ( 0 ) ;
( * ptr ) = newptr ;
return ( 1 ) ;
}
static int willus_mem_realloc_robust_warn ( void * * ptr , int newsize , int oldsize , char * name ,
int exitcode )
{
int status ;
status = willus_mem_realloc_robust ( ( double * * ) ptr , newsize , oldsize , name ) ;
if ( ! status )
mem_warn ( name , newsize , exitcode ) ;
return ( status ) ;
}
/* math.c */
static void sortd ( double * x , int n )
{
int top , n1 ;
double x0 ;
if ( n < 2 )
return ;
top = n / 2 ;
n1 = n - 1 ;
while ( 1 ) {
if ( top > 0 ) {
top - - ;
x0 = x [ top ] ;
} else {
x0 = x [ n1 ] ;
x [ n1 ] = x [ 0 ] ;
n1 - - ;
if ( ! n1 ) {
x [ 0 ] = x0 ;
return ;
}
}
{
int parent , child ;
parent = top ;
child = top * 2 + 1 ;
while ( child < = n1 ) {
if ( child < n1 & & x [ child ] < x [ child + 1 ] )
child + + ;
if ( x0 < x [ child ] ) {
x [ parent ] = x [ child ] ;
parent = child ;
child + = ( parent + 1 ) ;
} else
break ;
}
x [ parent ] = x0 ;
}
}
}
static void sorti ( int * x , int n )
{
int top , n1 ;
int x0 ;
if ( n < 2 )
return ;
top = n / 2 ;
n1 = n - 1 ;
while ( 1 ) {
if ( top > 0 ) {
top - - ;
x0 = x [ top ] ;
} else {
x0 = x [ n1 ] ;
x [ n1 ] = x [ 0 ] ;
n1 - - ;
if ( ! n1 ) {
x [ 0 ] = x0 ;
return ;
}
}
{
int parent , child ;
parent = top ;
child = top * 2 + 1 ;
while ( child < = n1 ) {
if ( child < n1 & & x [ child ] < x [ child + 1 ] )
child + + ;
if ( x0 < x [ child ] ) {
x [ parent ] = x [ child ] ;
parent = child ;
child + = ( parent + 1 ) ;
} else
break ;
}
x [ parent ] = x0 ;
}
}
}
/* bmp.c */
/*
* * Should call bmp_set_type ( ) right after this to set the bitmap type .
*/
# define RGBSET24(bmp,ptr,r,g,b) \
if ( bmp - > type = = WILLUSBITMAP_TYPE_NATIVE ) \
{ \
ptr [ 0 ] = r ; \
ptr [ 1 ] = g ; \
ptr [ 2 ] = b ; \
} \
else \
{ \
ptr [ 2 ] = r ; \
ptr [ 1 ] = g ; \
ptr [ 0 ] = b ; \
}
# define RGBGET(bmp,ptr,r,g,b) \
if ( bmp - > bpp = = 8 ) \
{ \
r = bmp - > red [ ptr [ 0 ] ] ; \
g = bmp - > green [ ptr [ 0 ] ] ; \
b = bmp - > blue [ ptr [ 0 ] ] ; \
} \
else if ( bmp - > type = = WILLUSBITMAP_TYPE_NATIVE ) \
{ \
r = ptr [ 0 ] ; \
g = ptr [ 1 ] ; \
b = ptr [ 2 ] ; \
} \
else \
{ \
r = ptr [ 2 ] ; \
g = ptr [ 1 ] ; \
b = ptr [ 0 ] ; \
}
# define RGBGETINCPTR(bmp,ptr,r,g,b) \
if ( bmp - > bpp = = 8 ) \
{ \
r = bmp - > red [ ptr [ 0 ] ] ; \
g = bmp - > green [ ptr [ 0 ] ] ; \
b = bmp - > blue [ ptr [ 0 ] ] ; \
ptr + + ; \
} \
else if ( bmp - > type = = WILLUSBITMAP_TYPE_NATIVE ) \
{ \
r = ptr [ 0 ] ; \
g = ptr [ 1 ] ; \
b = ptr [ 2 ] ; \
ptr + = 3 ; \
} \
else \
{ \
r = ptr [ 2 ] ; \
g = ptr [ 1 ] ; \
b = ptr [ 0 ] ; \
ptr + = 3 ; \
}
static void bmp_init ( WILLUSBITMAP * bmap )
{
bmap - > data = NULL ;
bmap - > size_allocated = 0 ;
bmap - > type = WILLUSBITMAP_TYPE_NATIVE ;
}
static int bmp_bytewidth_win32 ( WILLUSBITMAP * bmp )
{
return ( ( ( bmp - > bpp = = 24 ? bmp - > width * 3 : bmp - > width ) + 3 ) & ( ~ 0x3 ) ) ;
}
/*
* * The width , height , and bpp parameters of the WILLUSBITMAP structure
* * should be set before calling this function .
*/
static int bmp_alloc ( WILLUSBITMAP * bmap )
{
int size ;
static char * funcname = " bmp_alloc " ;
if ( bmap - > bpp ! = 8 & & bmap - > bpp ! = 24 ) {
printf ( " Internal error: call to bmp_alloc has bpp!=8 and bpp!=24! \n " ) ;
exit ( 10 ) ;
}
/* Choose the max size even if not WIN32 to avoid memory faults */
/* and to allow the possibility of changing the "type" of the */
/* bitmap without reallocating memory. */
size = bmp_bytewidth_win32 ( bmap ) * bmap - > height ;
if ( bmap - > data ! = NULL & & bmap - > size_allocated > = size )
return ( 1 ) ;
if ( bmap - > data ! = NULL )
willus_mem_realloc_robust_warn ( ( void * * ) & bmap - > data , size ,
bmap - > size_allocated , funcname , 10 ) ;
else
willus_mem_alloc_warn ( ( void * * ) & bmap - > data , size , funcname , 10 ) ;
bmap - > size_allocated = size ;
return ( 1 ) ;
}
static void bmp_free ( WILLUSBITMAP * bmap )
{
if ( bmap - > data ! = NULL )
{
willus_mem_free ( ( double * * ) & bmap - > data , " bmp_free " ) ;
bmap - > data = NULL ;
bmap - > size_allocated = 0 ;
}
}
/*
* * If 8 - bit , the bitmap is filled with < r > .
* * If 24 - bit , it gets < r > , < g > , < b > values .
*/
static void bmp_fill ( WILLUSBITMAP * bmp , int r , int g , int b )
{
int y , n ;
if ( bmp - > bpp = = 8 | | ( r = = g & & r = = b ) )
{
memset ( bmp - > data , r , bmp - > size_allocated ) ;
return ;
}
if ( bmp - > type = = WILLUSBITMAP_TYPE_WIN32 & & bmp - > bpp = = 24 )
{
y = r ;
r = b ;
b = y ;
}
for ( y = bmp - > height - 1 ; y > = 0 ; y - - )
{
unsigned char * p ;
p = bmp_rowptr_from_top ( bmp , y ) ;
for ( n = bmp - > width - 1 ; n > = 0 ; n - - )
{
( * p ) = r ;
p + + ;
( * p ) = g ;
p + + ;
( * p ) = b ;
p + + ;
}
}
}
static int bmp_copy ( WILLUSBITMAP * dest , WILLUSBITMAP * src )
{
dest - > width = src - > width ;
dest - > height = src - > height ;
dest - > bpp = src - > bpp ;
dest - > type = src - > type ;
if ( ! bmp_alloc ( dest ) )
return ( 0 ) ;
memcpy ( dest - > data , src - > data , src - > height * bmp_bytewidth ( src ) ) ;
memcpy ( dest - > red , src - > red , sizeof ( int ) * 256 ) ;
memcpy ( dest - > green , src - > green , sizeof ( int ) * 256 ) ;
memcpy ( dest - > blue , src - > blue , sizeof ( int ) * 256 ) ;
return ( 1 ) ;
}
static int bmp_bytewidth ( WILLUSBITMAP * bmp ) {
return ( bmp - > bpp = = 24 ? bmp - > width * 3 : bmp - > width ) ;
}
/*
* * row = = 0 = = > top row of bitmap
* * row = = bmp - > height - 1 = = > bottom row of bitmap
* * ( regardless of bitmap type )
*/
static unsigned char * bmp_rowptr_from_top ( WILLUSBITMAP * bmp , int row )
{
if ( bmp - > type = = WILLUSBITMAP_TYPE_WIN32 )
return ( & bmp - > data [ bmp_bytewidth ( bmp ) * ( bmp - > height - 1 - row ) ] ) ;
else
return ( & bmp - > data [ bmp_bytewidth ( bmp ) * row ] ) ;
}
/*
* * Allocate more bitmap rows .
* * ratio typically something like 1.5 or 2.0
*/
static void bmp_more_rows ( WILLUSBITMAP * bmp , double ratio , int pixval )
{
int new_height , new_bytes , bw ;
static char * funcname = " bmp_more_rows " ;
new_height = ( int ) ( bmp - > height * ratio + .5 ) ;
if ( new_height < = bmp - > height )
return ;
bw = bmp_bytewidth ( bmp ) ;
new_bytes = bw * new_height ;
if ( new_bytes > bmp - > size_allocated ) {
willus_mem_realloc_robust_warn ( ( void * * ) & bmp - > data , new_bytes ,
bmp - > size_allocated , funcname , 10 ) ;
bmp - > size_allocated = new_bytes ;
}
/* Fill in */
memset ( bmp_rowptr_from_top ( bmp , bmp - > height ) , pixval ,
( new_height - bmp - > height ) * bw ) ;
bmp - > height = new_height ;
}
static double resample_single ( double * y , double x1 , double x2 )
{
int i , i1 , i2 ;
double dx , dx1 , dx2 , sum ;
i1 = floor ( x1 ) ;
i2 = floor ( x2 ) ;
if ( i1 = = i2 )
return ( y [ i1 ] ) ;
dx = x2 - x1 ;
if ( dx > 1. )
dx = 1. ;
dx1 = 1. - ( x1 - i1 ) ;
dx2 = x2 - i2 ;
sum = 0. ;
if ( dx1 > 1e-8 * dx )
sum + = dx1 * y [ i1 ] ;
if ( dx2 > 1e-8 * dx )
sum + = dx2 * y [ i2 ] ;
for ( i = i1 + 1 ; i < = i2 - 1 ; sum + = y [ i ] , i + + ) ;
return ( sum / ( x2 - x1 ) ) ;
}
/*
* * Resample src [ ] into dst [ ] .
* * Examples : resample_1d ( dst , src , 0. , 5. , 5 ) would simply copy the
* * first five elements of src [ ] to dst [ ] .
* *
* * resample_1d ( dst , src , 0. , 5. , 10 ) would work as follows :
* * dst [ 0 ] and dst [ 1 ] would get src [ 0 ] .
* * dst [ 2 ] and dst [ 3 ] would get src [ 1 ] .
* * and so on .
* *
*/
static void resample_1d ( double * dst , double * src , double x1 , double x2 ,
int n )
{
int i ;
double new , last ;
last = x1 ;
for ( i = 0 ; i < n ; i + + )
{
new = x1 + ( x2 - x1 ) * ( i + 1 ) / n ;
dst [ i ] = resample_single ( src , last , new ) ;
last = new ;
}
}
static void bmp_resample_1 ( double * tempbmp , WILLUSBITMAP * src , double x1 , double y1 ,
double x2 , double y2 , int newwidth , int newheight ,
double * temprow , int color )
{
int row , col , x0 , dx , y0 , dy ;
x0 = floor ( x1 ) ;
dx = ceil ( x2 ) - x0 ;
x1 - = x0 ;
x2 - = x0 ;
y0 = floor ( y1 ) ;
dy = ceil ( y2 ) - y0 ;
y1 - = y0 ;
y2 - = y0 ;
if ( src - > type = = WILLUSBITMAP_TYPE_WIN32 & & color > = 0 )
color = 2 - color ;
for ( row = 0 ; row < dy ; row + + )
{
unsigned char * p ;
p = bmp_rowptr_from_top ( src , row + y0 ) ;
if ( src - > bpp = = 8 )
{
switch ( color )
{
case - 1 :
for ( col = 0 , p + = x0 ; col < dx ; col + + , p + + )
temprow [ col ] = p [ 0 ] ;
break ;
case 0 :
for ( col = 0 , p + = x0 ; col < dx ; col + + , p + + )
temprow [ col ] = src - > red [ p [ 0 ] ] ;
break ;
case 1 :
for ( col = 0 , p + = x0 ; col < dx ; col + + , p + + )
temprow [ col ] = src - > green [ p [ 0 ] ] ;
break ;
case 2 :
for ( col = 0 , p + = x0 ; col < dx ; col + + , p + + )
temprow [ col ] = src - > blue [ p [ 0 ] ] ;
break ;
}
}
else
{
p + = color ;
for ( col = 0 , p + = 3 * x0 ; col < dx ; temprow [ col ] = p [ 0 ] , col + + , p + = 3 ) ;
}
resample_1d ( & tempbmp [ row * newwidth ] , temprow , x1 , x2 , newwidth ) ;
}
for ( col = 0 ; col < newwidth ; col + + )
{
double * p , * s ;
p = & tempbmp [ col ] ;
s = & temprow [ dy ] ;
for ( row = 0 ; row < dy ; row + + , p + = newwidth )
temprow [ row ] = p [ 0 ] ;
resample_1d ( s , temprow , y1 , y2 , newheight ) ;
p = & tempbmp [ col ] ;
for ( row = 0 ; row < newheight ; row + + , p + = newwidth , s + + )
p [ 0 ] = s [ 0 ] ;
}
}
/*
* * Resample ( re - size ) bitmap . The pixel positions left to right go from
* * 0.0 to src - > width ( x - coord ) , and top to bottom go from
* * 0.0 to src - > height ( y - coord ) .
* * The cropped rectangle ( x1 , y1 ) to ( x2 , y2 ) is placed into
* * the destination bitmap , which need not be allocated yet .
* *
* * The destination bitmap will be 8 - bit grayscale if the source bitmap
* * passes the bmp_is_grayscale ( ) function . Otherwise it will be 24 - bit .
* *
* * Returns 0 for okay .
* * - 1 for not enough memory .
* * - 2 for bad cropping area or destination bitmap size
*/
static int bmp_resample ( WILLUSBITMAP * dest , WILLUSBITMAP * src , double x1 ,
double y1 , double x2 , double y2 , int newwidth , int newheight )
{
int gray , maxlen , colorplanes ;
double t ;
double * tempbmp ;
double * temprow ;
int color , hmax , row , col , dy ;
static char * funcname = " bmp_resample " ;
/* Clip and sort x1,y1 and x2,y2 */
if ( x1 > src - > width )
x1 = src - > width ;
else if ( x1 < 0. )
x1 = 0. ;
if ( x2 > src - > width )
x2 = src - > width ;
else if ( x2 < 0. )
x2 = 0. ;
if ( y1 > src - > height )
y1 = src - > height ;
else if ( y1 < 0. )
y1 = 0. ;
if ( y2 > src - > height )
y2 = src - > height ;
else if ( y2 < 0. )
y2 = 0. ;
if ( x2 < x1 ) {
t = x2 ;
x2 = x1 ;
x1 = t ;
}
if ( y2 < y1 ) {
t = y2 ;
y2 = y1 ;
y1 = t ;
}
dy = y2 - y1 ;
dy + = 2 ;
if ( x2 - x1 = = 0. | | y2 - y1 = = 0. )
return ( - 2 ) ;
/* Allocate temp storage */
maxlen = x2 - x1 > dy + newheight ? ( int ) ( x2 - x1 ) : dy + newheight ;
maxlen + = 16 ;
hmax = newheight > dy ? newheight : dy ;
if ( ! willus_mem_alloc ( & temprow , maxlen * sizeof ( double ) , funcname ) )
return ( - 1 ) ;
if ( ! willus_mem_alloc ( & tempbmp , hmax * newwidth * sizeof ( double ) ,
funcname ) ) {
willus_mem_free ( & temprow , funcname ) ;
return ( - 1 ) ;
}
if ( ( gray = bmp_is_grayscale ( src ) ) ! = 0 ) {
int i ;
dest - > bpp = 8 ;
for ( i = 0 ; i < 256 ; i + + )
dest - > red [ i ] = dest - > blue [ i ] = dest - > green [ i ] = i ;
} else
dest - > bpp = 24 ;
dest - > width = newwidth ;
dest - > height = newheight ;
dest - > type = WILLUSBITMAP_TYPE_NATIVE ;
if ( ! bmp_alloc ( dest ) ) {
willus_mem_free ( & tempbmp , funcname ) ;
willus_mem_free ( & temprow , funcname ) ;
return ( - 1 ) ;
}
colorplanes = gray ? 1 : 3 ;
for ( color = 0 ; color < colorplanes ; color + + ) {
bmp_resample_1 ( tempbmp , src , x1 , y1 , x2 , y2 , newwidth , newheight ,
temprow , gray ? - 1 : color ) ;
for ( row = 0 ; row < newheight ; row + + ) {
unsigned char * p ;
double * s ;
p = bmp_rowptr_from_top ( dest , row ) + color ;
s = & tempbmp [ row * newwidth ] ;
if ( colorplanes = = 1 )
for ( col = 0 ; col < newwidth ;
p [ 0 ] = ( int ) ( s [ 0 ] + .5 ) , col + + , s + + , p + + )
;
else
for ( col = 0 ; col < newwidth ;
p [ 0 ] = ( int ) ( s [ 0 ] + .5 ) , col + + , s + + , p + = colorplanes )
;
}
}
willus_mem_free ( & tempbmp , funcname ) ;
willus_mem_free ( & temprow , funcname ) ;
return ( 0 ) ;
}
static int bmp8_greylevel_convert ( int r , int g , int b )
{
return ( ( int ) ( ( r * 0.3 + g * 0.59 + b * 0.11 ) * 1.002 ) ) ;
}
/*
* * One of dest or src can be NULL , which is the
* * same as setting them equal to each other , but
* * in this case , the bitmap must be 24 - bit !
*/
static int bmp_is_grayscale ( WILLUSBITMAP * bmp )
{
int i ;
if ( bmp - > bpp ! = 8 )
return ( 0 ) ;
for ( i = 0 ; i < 256 ; i + + )
if ( bmp - > red [ i ] ! = i | | bmp - > green [ i ] ! = i | | bmp - > blue [ i ] ! = i )
return ( 0 ) ;
return ( 1 ) ;
}
static void bmp_color_xform8 ( WILLUSBITMAP * dest , WILLUSBITMAP * src , unsigned char * newval )
{
int i , ir ;
if ( src = = NULL )
src = dest ;
if ( dest = = NULL )
dest = src ;
if ( dest ! = src )
{
dest - > width = src - > width ;
dest - > height = src - > height ;
dest - > bpp = 8 ;
for ( i = 0 ; i < 256 ; i + + )
dest - > red [ i ] = dest - > green [ i ] = dest - > blue [ i ] = i ;
bmp_alloc ( dest ) ;
}
for ( ir = 0 ; ir < src - > height ; ir + + )
{
unsigned char * sp , * dp ;
sp = bmp_rowptr_from_top ( src , ir ) ;
dp = bmp_rowptr_from_top ( dest , ir ) ;
for ( i = 0 ; i < src - > width ; i + + )
dp [ i ] = newval [ sp [ i ] ] ;
}
}
/*
* * One of dest or src can be NULL , which is the
* * same as setting them equal to each other , but
* * in this case , the bitmap must be 24 - bit !
*/
static void bmp_color_xform ( WILLUSBITMAP * dest , WILLUSBITMAP * src , unsigned char * newval )
{
int ir , ic ;
if ( src = = NULL )
src = dest ;
if ( dest = = NULL )
dest = src ;
if ( bmp_is_grayscale ( src ) )
{
bmp_color_xform8 ( dest , src , newval ) ;
return ;
}
if ( dest ! = src )
{
dest - > width = src - > width ;
dest - > height = src - > height ;
dest - > bpp = 24 ;
bmp_alloc ( dest ) ;
}
for ( ir = 0 ; ir < src - > height ; ir + + )
{
unsigned char * sp , * dp ;
sp = bmp_rowptr_from_top ( src , ir ) ;
dp = bmp_rowptr_from_top ( dest , ir ) ;
for ( ic = 0 ; ic < src - > width ; ic + + , dp + = 3 )
{
int r , g , b ;
RGBGETINCPTR ( src , sp , r , g , b ) ;
r = newval [ r ] ;
g = newval [ g ] ;
b = newval [ b ] ;
RGBSET24 ( dest , dp , r , g , b ) ;
}
}
}
/*
* * One of dest or src can be NULL , which is the
* * same as setting them equal to each other , but
* * in this case , the bitmap must be 24 - bit !
* * Note : contrast > 1 will increase the contrast .
* * contrast < 1 will decrease the contrast .
* * contrast of 0 will make all pixels the same value .
* * contrast of 1 will not change the image .
*/
static void bmp_contrast_adjust ( WILLUSBITMAP * dest , WILLUSBITMAP * src , double contrast )
{
int i ;
static unsigned char newval [ 256 ] ;
for ( i = 0 ; i < 256 ; i + + )
{
double x , y ;
int sgn , v ;
x = ( i - 127.5 ) / 127.5 ;
sgn = x < 0 ? - 1 : 1 ;
if ( contrast < 0 )
sgn = - sgn ;
x = fabs ( x ) ;
if ( fabs ( contrast ) > 1.5 )
y = x < .99999 ? 1 - exp ( fabs ( contrast ) * x / ( x - 1 ) ) : 1. ;
else
{
y = fabs ( contrast ) * x ;
if ( y > 1. )
y = 1. ;
}
y = 127.5 + y * sgn * 127.5 ;
v = ( int ) ( y + .5 ) ;
if ( v < 0 )
v = 0 ;
if ( v > 255 )
v = 255 ;
newval [ i ] = v ;
}
bmp_color_xform ( dest , src , newval ) ;
}
/*
* * Convert bitmap to grey - scale in - situ
*/
static void bmp_convert_to_greyscale_ex ( WILLUSBITMAP * dst , WILLUSBITMAP * src )
{
int oldbpr , newbpr , bpp , dp , rownum , colnum , i ;
oldbpr = bmp_bytewidth ( src ) ;
dp = src - > bpp = = 8 ? 1 : 3 ;
bpp = src - > bpp ;
dst - > bpp = 8 ;
for ( i = 0 ; i < 256 ; i + + )
dst - > red [ i ] = dst - > green [ i ] = dst - > blue [ i ] = i ;
if ( dst ! = src ) {
dst - > width = src - > width ;
dst - > height = src - > height ;
bmp_alloc ( dst ) ;
}
newbpr = bmp_bytewidth ( dst ) ;
/* Possibly restore src->bpp to 24 so RGBGET works right (src & dst may be the same) */
src - > bpp = bpp ;
for ( rownum = 0 ; rownum < src - > height ; rownum + + ) {
unsigned char * oldp , * newp ;
oldp = & src - > data [ oldbpr * rownum ] ;
newp = & dst - > data [ newbpr * rownum ] ;
for ( colnum = 0 ; colnum < src - > width ; colnum + + , oldp + = dp , newp + + ) {
int r , g , b ;
RGBGET ( src , oldp , r , g , b ) ;
( * newp ) = bmp8_greylevel_convert ( r , g , b ) ;
}
}
dst - > bpp = 8 ; /* Possibly restore dst->bpp to 8 since src & dst may be the same. */
}
2012-10-27 08:03:49 +00:00
/*
* * Bitmap is assumed to be grayscale
*/
static double bmp_row_by_row_stdev ( WILLUSBITMAP * bmp , int ccount ,
int whitethresh , double theta_radians )
{
int dc1 , dc2 , c1 , c2 ;
int r , n , nn , dw ;
double tanth , csum , csumsq , stdev ;
c1 = bmp - > width / 15. ;
c2 = bmp - > width - c1 ;
dw = ( int ) ( ( c2 - c1 ) / ccount + .5 ) ;
if ( dw < 1 )
dw = 1 ;
tanth = - tan ( theta_radians ) ;
dc1 = ( int ) ( tanth * bmp - > width ) ;
if ( dc1 < 0 ) {
dc1 = 1 - dc1 ;
dc2 = 0 ;
} else {
dc2 = - dc1 - 1 ;
dc1 = 0 ;
}
dc1 + = bmp - > height / 15. ;
dc2 - = bmp - > height / 15. ;
csum = csumsq = 0. ;
n = 0 ;
for ( r = dc1 + 1 ; r < bmp - > height + dc2 - 1 ; r + + ) {
int c , count , r0last ;
double dcount ;
unsigned char * p ;
r0last = 0 ;
p = bmp_rowptr_from_top ( bmp , r0last ) ;
for ( nn = count = 0 , c = c1 ; c < c2 ; c + = dw ) {
int r0 ;
r0 = r + tanth * c ;
if ( r0 < 0 | | r0 > = bmp - > height )
continue ;
if ( r0 ! = r0last ) {
r0last = r0 ;
p = bmp_rowptr_from_top ( bmp , r0last ) ;
}
nn + + ;
if ( p [ c ] < whitethresh )
count + + ;
}
dcount = 100. * count / nn ;
csum + = dcount ;
csumsq + = dcount * dcount ;
n + + ;
}
stdev = sqrt ( fabs ( ( csum / n ) * ( csum / n ) - csumsq / n ) ) ;
return ( stdev ) ;
}
/*
* * y0 = 0 = = > bottom row !
*/
static void bmp_pix_vali ( WILLUSBITMAP * bmp , int x0 , int y0 , int * r , int * g , int * b )
{
unsigned char * p ;
int rr , gg , bb ;
p = bmp_rowptr_from_top ( bmp , bmp - > height - 1 - y0 ) ;
p = & p [ x0 * ( bmp - > bpp > > 3 ) ] ;
RGBGET ( bmp , p , rr , gg , bb ) ;
( * r ) = rr ;
( * g ) = gg ;
( * b ) = bb ;
}
/*
* * y0 = 0 = = > bottom row !
*/
static int bmp_grey_pix_vali ( WILLUSBITMAP * bmp , int x0 , int y0 )
{
unsigned char * p ;
int r , g , b ;
p = bmp_rowptr_from_top ( bmp , bmp - > height - 1 - y0 ) ;
p = & p [ x0 * ( bmp - > bpp > > 3 ) ] ;
RGBGET ( bmp , p , r , g , b ) ;
return ( bmp8_greylevel_convert ( r , g , b ) ) ;
}
/*
* * Return pix value ( 0.0 - 255.0 ) in double precision given
* * a double precision position . Bitmap is assumed to be 8 - bit greyscale .
* *
* * x0 , y0 are from bottom corner .
* * x0 = 0.5 , y0 = 0.5 would give exactly the value of the pixel
* * in the lower left corner of the bitmap .
*/
double bmp_grey_pix_vald ( WILLUSBITMAP * bmp , double x0 , double y0 )
{
int ix0 , iy0 , ix1 , iy1 ;
double fx0 , fx1 , fy0 , fy1 ;
ix0 = ( int ) ( x0 - .5 ) ;
ix1 = ix0 + 1 ;
iy0 = ( int ) ( y0 - .5 ) ;
iy1 = iy0 + 1 ;
BOUND ( ix0 , 0 , bmp - > width - 1 ) ;
BOUND ( ix1 , 0 , bmp - > width - 1 ) ;
BOUND ( iy0 , 0 , bmp - > height - 1 ) ;
BOUND ( iy1 , 0 , bmp - > height - 1 ) ;
fx0 = 1. - fabs ( ix0 + 0.5 - x0 ) ;
if ( fx0 < 0. )
fx0 = 0. ;
fx1 = 1. - fabs ( ix1 + 0.5 - x0 ) ;
if ( fx1 < 0. )
fx1 = 0. ;
fy0 = 1. - fabs ( iy0 + 0.5 - y0 ) ;
if ( fy0 < 0. )
fy0 = 0. ;
fy1 = 1. - fabs ( iy1 + 0.5 - y0 ) ;
if ( fy1 < 0. )
fy1 = 0. ;
if ( ( fx0 = = 0. & & fx1 = = 0. ) | | ( fy0 = = 0. & & fy1 = = 0. ) )
return ( - 1. ) ;
return ( ( fy0
* ( fx0 * bmp_grey_pix_vali ( bmp , ix0 , iy0 )
+ fx1 * bmp_grey_pix_vali ( bmp , ix1 , iy0 ) )
+ fy1
* ( fx0 * bmp_grey_pix_vali ( bmp , ix0 , iy1 )
+ fx1 * bmp_grey_pix_vali ( bmp , ix1 , iy1 ) ) )
/ ( ( fx0 + fx1 ) * ( fy0 + fy1 ) ) ) ;
}
/*
* * Return pix values ( 0.0 - 255.0 ) in double precision given
* * a double precision position .
* *
* * x0 , y0 are from BOTTOM CORNER .
* * x0 = 0.5 , y0 = 0.5 would give exactly the value of the pixel
* * in the lower left corner of the bitmap .
*/
static void bmp_pix_vald ( WILLUSBITMAP * bmp , double x0 , double y0 , double * r , double * g ,
double * b )
{
int ix0 , iy0 , ix1 , iy1 ;
double fx0 , fx1 , fy0 , fy1 ;
int r00 , r10 , r01 , r11 ;
int g00 , g10 , g01 , g11 ;
int b00 , b10 , b01 , b11 ;
ix0 = ( int ) ( x0 - .5 ) ;
ix1 = ix0 + 1 ;
iy0 = ( int ) ( y0 - .5 ) ;
iy1 = iy0 + 1 ;
BOUND ( ix0 , 0 , bmp - > width - 1 ) ;
BOUND ( ix1 , 0 , bmp - > width - 1 ) ;
BOUND ( iy0 , 0 , bmp - > height - 1 ) ;
BOUND ( iy1 , 0 , bmp - > height - 1 ) ;
fx0 = 1. - fabs ( ix0 + 0.5 - x0 ) ;
if ( fx0 < 0. )
fx0 = 0. ;
fx1 = 1. - fabs ( ix1 + 0.5 - x0 ) ;
if ( fx1 < 0. )
fx1 = 0. ;
fy0 = 1. - fabs ( iy0 + 0.5 - y0 ) ;
if ( fy0 < 0. )
fy0 = 0. ;
fy1 = 1. - fabs ( iy1 + 0.5 - y0 ) ;
if ( fy1 < 0. )
fy1 = 0. ;
if ( ( fx0 = = 0. & & fx1 = = 0. ) | | ( fy0 = = 0. & & fy1 = = 0. ) ) {
( * r ) = ( * g ) = ( * b ) = - 1. ;
return ;
}
bmp_pix_vali ( bmp , ix0 , iy0 , & r00 , & g00 , & b00 ) ;
bmp_pix_vali ( bmp , ix1 , iy0 , & r10 , & g10 , & b10 ) ;
bmp_pix_vali ( bmp , ix0 , iy1 , & r01 , & g01 , & b01 ) ;
bmp_pix_vali ( bmp , ix1 , iy1 , & r11 , & g11 , & b11 ) ;
( * r ) = ( ( fy0 * ( fx0 * r00 + fx1 * r10 ) + fy1 * ( fx0 * r01 + fx1 * r11 ) )
/ ( ( fx0 + fx1 ) * ( fy0 + fy1 ) ) ) ;
( * g ) = ( ( fy0 * ( fx0 * g00 + fx1 * g10 ) + fy1 * ( fx0 * g01 + fx1 * g11 ) )
/ ( ( fx0 + fx1 ) * ( fy0 + fy1 ) ) ) ;
( * b ) = ( ( fy0 * ( fx0 * b00 + fx1 * b10 ) + fy1 * ( fx0 * b01 + fx1 * b11 ) )
/ ( ( fx0 + fx1 ) * ( fy0 + fy1 ) ) ) ;
}
static void bmp_rotate_fast ( WILLUSBITMAP * bmp , double degrees , int expand )
{
WILLUSBITMAP _dst , * dst ;
double th , sth , cth ;
int i , r , g , b , w , h , row , col ;
dst = & _dst ;
th = degrees * PI / 180. ;
sth = sin ( th ) ;
cth = cos ( th ) ;
if ( expand ) {
w = ( int ) ( fabs ( bmp - > width * cth ) + fabs ( bmp - > height * sth ) + .5 ) ;
h = ( int ) ( fabs ( bmp - > height * cth ) + fabs ( bmp - > width * sth ) + .5 ) ;
} else {
w = bmp - > width ;
h = bmp - > height ;
}
dst = & _dst ;
bmp_init ( dst ) ;
dst - > width = w ;
dst - > height = h ;
dst - > bpp = bmp - > bpp ;
if ( dst - > bpp = = 8 )
for ( i = 0 ; i < = 255 ; i + + )
dst - > red [ i ] = dst - > green [ i ] = dst - > blue [ i ] = i ;
bmp_alloc ( dst ) ;
bmp_pix_vali ( bmp , 0 , 0 , & r , & g , & b ) ;
bmp_fill ( dst , r , g , b ) ;
if ( dst - > bpp = = 8 )
for ( row = 0 ; row < dst - > height ; row + + ) {
unsigned char * p ;
double x1 , y1 , x2 , y2 ;
y2 = dst - > height / 2. - row ;
p = bmp_rowptr_from_top ( dst , row ) ;
for ( x2 = - dst - > width / 2. , col = 0 ; col < dst - > width ;
col + + , p + + , x2 + = 1.0 ) {
double g ;
x1 = - .5 + bmp - > width / 2. + x2 * cth + y2 * sth ;
y1 = - .5 + bmp - > height / 2. + y2 * cth - x2 * sth ;
if ( x1 < 0. | | x1 > = bmp - > width | | y1 < 0. | | y1 > = bmp - > height )
continue ;
g = bmp_grey_pix_vald ( bmp , x1 , y1 ) ;
if ( g > = 0. )
p [ 0 ] = g ;
}
}
else
for ( row = 0 ; row < dst - > height ; row + + ) {
unsigned char * p ;
double x1 , y1 , x2 , y2 ;
y2 = dst - > height / 2. - row ;
p = bmp_rowptr_from_top ( dst , row ) ;
for ( x2 = - dst - > width / 2. , col = 0 ; col < dst - > width ; col + + , p + =
3 , x2 + = 1.0 ) {
double rr , gg , bb ;
x1 = - .5 + bmp - > width / 2. + x2 * cth + y2 * sth ;
y1 = - .5 + bmp - > height / 2. + y2 * cth - x2 * sth ;
if ( x1 < 0. | | x1 > = bmp - > width | | y1 < 0. | | y1 > = bmp - > height )
continue ;
bmp_pix_vald ( bmp , x1 , y1 , & rr , & gg , & bb ) ;
if ( rr < 0. )
continue ;
p [ 0 ] = rr ;
p [ 1 ] = gg ;
p [ 2 ] = bb ;
}
}
bmp_copy ( bmp , dst ) ;
bmp_free ( dst ) ;
}
static double bmp_autostraighten ( WILLUSBITMAP * src , WILLUSBITMAP * srcgrey , int white ,
double maxdegrees , double mindegrees , int debug )
{
int i , na , n , imax ;
double stepsize , sdmin , sdmax , rotdeg ;
double * sdev ;
static int rpc = 0 ;
static char * funcname = " bmp_autostraighten " ;
rpc + + ;
stepsize = .5 ;
na = ( int ) ( maxdegrees / stepsize + .5 ) ;
if ( na < 1 )
na = 1 ;
n = 1 + na * 2 ;
sdmin = 999. ;
sdmax = - 999. ;
imax = 0 ;
willus_mem_alloc_warn ( ( void * * ) & sdev , n * sizeof ( double ) , funcname , 10 ) ;
for ( i = 0 ; i < n ; i + + ) {
double theta , sdev0 ;
theta = ( i - na ) * stepsize * PI / 180. ;
sdev0 = bmp_row_by_row_stdev ( srcgrey , 400 , white , theta ) ;
if ( sdmin > sdev0 )
sdmin = sdev0 ;
if ( sdmax < sdev0 ) {
imax = i ;
sdmax = sdev0 ;
}
sdev [ i ] = sdev0 ;
}
if ( sdmax < = 0. ) {
willus_mem_free ( ( double * * ) & sdev , funcname ) ;
return ( 0. ) ;
}
for ( i = 0 ; i < n ; i + + )
sdev [ i ] / = sdmax ;
sdmin / = sdmax ;
rotdeg = - ( imax - na ) * stepsize ;
if ( sdmin > 0.95 | | fabs ( rotdeg ) < = mindegrees
| | fabs ( fabs ( rotdeg ) - fabs ( maxdegrees ) ) < 0.25 ) {
willus_mem_free ( ( double * * ) & sdev , funcname ) ;
return ( 0. ) ;
}
if ( imax > = 3 & & imax < = n - 4 ) {
double sd1min , sd2min , sdthresh ;
for ( sd1min = sdev [ imax - 1 ] , i = imax - 2 ; i > = 0 ; i - - )
if ( sd1min > sdev [ i ] )
sd1min = sdev [ i ] ;
for ( sd2min = sdev [ imax + 1 ] , i = imax + 2 ; i < n ; i + + )
if ( sd2min > sdev [ i ] )
sd2min = sdev [ i ] ;
sdthresh = sd1min > sd2min ? sd1min * 1.01 : sd2min * 1.01 ;
if ( sdthresh < 0.9 )
sdthresh = 0.9 ;
if ( sdthresh < 0.95 ) {
double deg1 , deg2 ;
for ( i = imax - 1 ; i > = 0 ; i - - )
if ( sdev [ i ] < sdthresh )
break ;
deg1 =
stepsize
* ( ( i - na )
+ ( sdthresh - sdev [ i ] )
/ ( sdev [ i + 1 ] - sdev [ i ] ) ) ;
for ( i = imax + 1 ; i < n - 1 ; i + + )
if ( sdev [ i ] < sdthresh )
break ;
deg2 =
stepsize
* ( ( i - na )
- ( sdthresh - sdev [ i ] )
/ ( sdev [ i - 1 ] - sdev [ i ] ) ) ;
if ( deg2 - deg1 < 2.5 ) {
rotdeg = - ( deg1 + deg2 ) / 2. ;
if ( debug )
printf ( " /sa l \" srcpage %d, %.1f%% line \" 2 \n /sa m 2 2 \n "
" %g 0 \n %g 1 \n //nc \n "
" /sa l \" srcpage %d, %.1f%% line \" 2 \n /sa m 2 2 \n "
" %g 0 \n %g 1 \n //nc \n " , rpc , sdthresh * 100. ,
deg1 , deg1 , rpc , sdthresh * 100. , deg2 , deg2 ) ;
}
}
}
printf ( " \n (Straightening page: rotating cc by %.2f deg.) \n " , rotdeg ) ;
/* BMP rotation fills with pixel value at (0,0) */
srcgrey - > data [ 0 ] = 255 ;
bmp_rotate_fast ( srcgrey , rotdeg , 0 ) ;
if ( src ! = NULL ) {
src - > data [ 0 ] = src - > data [ 1 ] = src - > data [ 2 ] = 255 ;
bmp_rotate_fast ( src , rotdeg , 0 ) ;
}
willus_mem_free ( ( double * * ) & sdev , funcname ) ;
return ( rotdeg ) ;
}
2012-10-27 14:38:53 +00:00
static void bmp_flip_horizontal ( WILLUSBITMAP * bmp )
{
int i , j , bpp ;
bpp = bmp - > bpp / 8 ;
for ( i = 0 ; i < bmp - > height ; i + + ) {
unsigned char * p , * p2 ;
p = bmp_rowptr_from_top ( bmp , i ) ;
p2 = & p [ ( bmp - > width - 1 ) * bpp ] ;
for ( ; p < p2 ; p + = bpp , p2 - = bpp )
for ( j = 0 ; j < bpp ; j + + ) {
unsigned char t ;
t = p [ j ] ;
p [ j ] = p2 [ j ] ;
p2 [ j ] = t ;
}
}
}
static void bmp_flip_vertical ( WILLUSBITMAP * bmp )
{
int i , bw , n ;
bw = bmp_bytewidth ( bmp ) ;
n = bmp - > height / 2 ;
for ( i = 0 ; i < n ; i + + ) {
unsigned char * p , * p2 ;
int j ;
p = bmp_rowptr_from_top ( bmp , i ) ;
p2 = bmp_rowptr_from_top ( bmp , bmp - > height - i - 1 ) ;
for ( j = bw ; j > 0 ; j - - , p + + , p2 + + ) {
unsigned char t ;
t = p [ 0 ] ;
p [ 0 ] = p2 [ 0 ] ;
p2 [ 0 ] = t ;
}
}
}
static int bmp_rotate_90 ( WILLUSBITMAP * bmp )
{
WILLUSBITMAP * sbmp , _sbmp ;
int bpp , dbw , sr ;
sbmp = & _sbmp ;
bmp_init ( sbmp ) ;
if ( ! bmp_copy ( sbmp , bmp ) )
return ( 0 ) ;
bmp - > width = sbmp - > height ;
bmp - > height = sbmp - > width ;
bpp = bmp - > bpp / 8 ;
if ( ! bmp_alloc ( bmp ) ) {
bmp_free ( sbmp ) ;
return ( 0 ) ;
}
dbw = ( int ) ( bmp_rowptr_from_top ( bmp , 1 ) - bmp_rowptr_from_top ( bmp , 0 ) ) ;
for ( sr = 0 ; sr < sbmp - > height ; sr + + ) {
unsigned char * sp , * dp ;
int j , sc ;
sp = bmp_rowptr_from_top ( sbmp , sr ) ;
dp = bmp_rowptr_from_top ( bmp , bmp - > height - 1 ) + bpp * sr ;
for ( sc = sbmp - > width ; sc > 0 ; sc - - , dp - = dbw )
for ( j = 0 ; j < bpp ; j + + , sp + + )
dp [ j ] = sp [ 0 ] ;
}
bmp_free ( sbmp ) ;
return ( 1 ) ;
}
static int bmp_rotate_270 ( WILLUSBITMAP * bmp )
{
WILLUSBITMAP * sbmp , _sbmp ;
int bpp , dbw , sr ;
sbmp = & _sbmp ;
bmp_init ( sbmp ) ;
if ( ! bmp_copy ( sbmp , bmp ) )
return ( 0 ) ;
bmp - > width = sbmp - > height ;
bmp - > height = sbmp - > width ;
bpp = bmp - > bpp / 8 ;
if ( ! bmp_alloc ( bmp ) ) {
bmp_free ( sbmp ) ;
return ( 0 ) ;
}
dbw = ( int ) ( bmp_rowptr_from_top ( bmp , 1 ) - bmp_rowptr_from_top ( bmp , 0 ) ) ;
for ( sr = 0 ; sr < sbmp - > height ; sr + + ) {
unsigned char * sp , * dp ;
int j , sc ;
sp = bmp_rowptr_from_top ( sbmp , sr ) ;
dp = bmp_rowptr_from_top ( bmp , 0 ) + bpp * ( sbmp - > height - 1 - sr ) ;
for ( sc = sbmp - > width ; sc > 0 ; sc - - , dp + = dbw )
for ( j = 0 ; j < bpp ; j + + , sp + + )
dp [ j ] = sp [ 0 ] ;
}
bmp_free ( sbmp ) ;
return ( 1 ) ;
}
/*
* * 1 = okay , 0 = fail
*/
static int bmp_rotate_right_angle ( WILLUSBITMAP * bmp , int degrees )
{
int d ;
d = degrees % 360 ;
if ( d < 0 )
d + = 360 ;
d = ( d + 45 ) / 90 ;
if ( d = = 1 )
return ( bmp_rotate_90 ( bmp ) ) ;
if ( d = = 2 ) {
bmp_flip_horizontal ( bmp ) ;
bmp_flip_vertical ( bmp ) ;
return ( 1 ) ;
}
if ( d = = 3 )
return ( bmp_rotate_270 ( bmp ) ) ;
return ( 1 ) ;
}
2012-10-16 02:02:38 +00:00
/* bmpmupdf.c */
static int bmpmupdf_pixmap_to_bmp ( WILLUSBITMAP * bmp , fz_context * ctx ,
fz_pixmap * pixmap )
{
unsigned char * p ;
int ncomp , i , row , col ;
bmp - > width = fz_pixmap_width ( ctx , pixmap ) ;
bmp - > height = fz_pixmap_height ( ctx , pixmap ) ;
ncomp = fz_pixmap_components ( ctx , pixmap ) ;
/* Has to be 8-bit or RGB */
if ( ncomp ! = 2 & & ncomp ! = 4 )
return ( - 1 ) ;
bmp - > bpp = ( ncomp = = 2 ) ? 8 : 24 ;
bmp_alloc ( bmp ) ;
if ( ncomp = = 2 )
for ( i = 0 ; i < 256 ; i + + )
bmp - > red [ i ] = bmp - > green [ i ] = bmp - > blue [ i ] = i ;
p = fz_pixmap_samples ( ctx , pixmap ) ;
if ( ncomp = = 1 )
for ( row = 0 ; row < bmp - > height ; row + + ) {
unsigned char * dest ;
dest = bmp_rowptr_from_top ( bmp , row ) ;
memcpy ( dest , p , bmp - > width ) ;
p + = bmp - > width ;
}
else if ( ncomp = = 2 )
for ( row = 0 ; row < bmp - > height ; row + + ) {
unsigned char * dest ;
dest = bmp_rowptr_from_top ( bmp , row ) ;
for ( col = 0 ; col < bmp - > width ; col + + , dest + + , p + = 2 )
dest [ 0 ] = p [ 0 ] ;
}
else
for ( row = 0 ; row < bmp - > height ; row + + ) {
unsigned char * dest ;
dest = bmp_rowptr_from_top ( bmp , row ) ;
for ( col = 0 ; col < bmp - > width ;
col + + , dest + = ncomp - 1 , p + = ncomp )
memcpy ( dest , p , ncomp - 1 ) ;
}
return ( 0 ) ;
}
2012-10-18 17:20:48 +00:00
static void handle ( int wait , ddjvu_context_t * ctx )
{
const ddjvu_message_t * msg ;
if ( ! ctx )
return ;
if ( wait )
msg = ddjvu_message_wait ( ctx ) ;
while ( ( msg = ddjvu_message_peek ( ctx ) ) )
{
switch ( msg - > m_any . tag )
{
case DDJVU_ERROR :
fprintf ( stderr , " ddjvu: %s \n " , msg - > m_error . message ) ;
if ( msg - > m_error . filename )
fprintf ( stderr , " ddjvu: '%s:%d' \n " ,
msg - > m_error . filename , msg - > m_error . lineno ) ;
exit ( 10 ) ;
default :
break ;
}
}
ddjvu_message_pop ( ctx ) ;
}