/* ** k2pdfopt.c K2pdfopt optimizes PDF/DJVU files for mobile e-readers ** (e.g. the Kindle) and smartphones. It works well on ** multi-column PDF/DJVU files. K2pdfopt is freeware. ** ** Copyright (C) 2012 http://willus.com ** ** This program is free software: you can redistribute it and/or modify ** it under the terms of the GNU Affero General Public License as ** published by the Free Software Foundation, either version 3 of the ** License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU Affero General Public License for more details. ** ** You should have received a copy of the GNU Affero General Public License ** along with this program. If not, see . ** /* ** WILLUSDEBUGX flags: ** 1 = Generic ** 2 = breakinfo row analysis ** 4 = word wrapping ** 8 = word wrapping II ** 16 = hyphens ** 32 = OCR ** */ // #define WILLUSDEBUGX 32 // #define WILLUSDEBUG #include #include #include #include #include #include #include "k2pdfopt.h" #define HAVE_MUPDF #define VERSION "v1.51" #define GRAYLEVEL(r,g,b) ((int)(((r)*0.3+(g)*0.59+(b)*0.11)*1.002)) #if (defined(WIN32) || defined(WIN64)) #define TTEXT_BOLD ANSI_WHITE #define TTEXT_NORMAL ANSI_NORMAL #define TTEXT_BOLD2 ANSI_YELLOW #define TTEXT_INPUT ANSI_GREEN #define TTEXT_WARN ANSI_RED #define TTEXT_HEADER ANSI_CYAN #define TTEXT_MAGENTA ANSI_MAGENTA #else #define TTEXT_BOLD "\x1b[0m\x1b[34m" #define TTEXT_NORMAL "\x1b[0m" #define TTEXT_BOLD2 "\x1b[0m\x1b[33m" #define TTEXT_INPUT "\x1b[0m\x1b[32m" #define TTEXT_WARN "\x1b[0m\x1b[31m" #define TTEXT_HEADER "\x1b[0m\x1b[36m" #define TTEXT_MAGENTA "\x1b[0m\x1b[35m" #endif #ifndef __ANSI_H__ #define ANSI_RED "\x1b[1m\x1b[31m" #define ANSI_GREEN "\x1b[1m\x1b[32m" #define ANSI_YELLOW "\x1b[1m\x1b[33m" #define ANSI_BROWN "\x1b[0m\x1b[33m" #define ANSI_BLUE "\x1b[1m\x1b[34m" #define ANSI_MAGENTA "\x1b[1m\x1b[35m" #define ANSI_CYAN "\x1b[1m\x1b[36m" #define ANSI_WHITE "\x1b[1m\x1b[37m" #define ANSI_NORMAL "\x1b[0m\x1b[37m" #define ANSI_SAVE_CURSOR "\x1b[s" #define ANSI_RESTORE_CURSOR "\x1b[u" #define ANSI_CLEAR_TO_END "\x1b[K" #define ANSI_BEGIN_LINE "\x1b[80D" #define ANSI_UP_ONE_LINE "\x1b[1A" #define ANSI_HOME "\x1b[2J\x1b[0;0;H" #define __ANSI_H__ #endif /* bmp.c */ #define WILLUSBITMAP_TYPE_NATIVE 0 #define WILLUSBITMAP_TYPE_WIN32 1 #ifdef PI #undef PI #endif /* ** Constants from the front of the CRC standard math tables ** (Accuracy = 50 digits) */ #define PI 3.14159265358979323846264338327950288419716939937511 #define SQRT2 1.41421356237309504880168872420969807856967187537695 #define SQRT3 1.73205080756887729352744634150587236694280525381039 #define LOG10E 0.43429448190325182765112891891660508229439700580367 #define DBPERNEP (20.*LOG10E) #define SRC_TYPE_PDF 1 #define SRC_TYPE_DJVU 2 #define SRC_TYPE_OTHER 3 /* DATA STRUCTURES */ typedef struct { int page; /* Source page */ double rot_deg; /* Source rotation (happens first) */ double x0, y0; /* x0,y0, in points, of lower left point on rectangle */ double w, h; /* width and height of rectangle in points */ double scale; /* Scale rectangle by this factor on destination page */ double x1, y1; /* (x,y) position of lower left point on destination page, in points */ } PDFBOX; typedef struct { PDFBOX *box; int n; int na; } PDFBOXES; typedef struct { int pageno; /* Source page number */ double page_rot_deg; /* Source page rotation */ PDFBOXES boxes; } PAGEINFO; typedef struct { int ch; /* Hyphen starting point -- < 0 for no hyphen */ int c2; /* End of end region if hyphen is erased */ int r1; /* Top of hyphen */ int r2; /* Bottom of hyphen */ } HYPHENINFO; typedef struct { int c1, c2; /* Left and right columns */ int r1, r2; /* Top and bottom of region in pixels */ int rowbase; /* Baseline of row */ int gap; /* Gap to next region in pixels */ int rowheight; /* text + gap */ int capheight; int h5050; int lcheight; HYPHENINFO hyphen; } TEXTROW; typedef struct { TEXTROW *textrow; int rhmean_pixels; /* Mean row height (text) */ int centered; /* Is this set of rows centered? */ int n, na; } BREAKINFO; typedef struct { int red[256]; int green[256]; int blue[256]; unsigned char *data; /* Top to bottom in native type, bottom to */ /* top in Win32 type. */ int width; /* Width of image in pixels */ int height; /* Height of image in pixels */ int bpp; /* Bits per pixel (only 8 or 24 allowed) */ int size_allocated; int type; /* See defines above for WILLUSBITMAP_TYPE_... */ } WILLUSBITMAP; typedef struct { int r1, r2; /* row position from top of bmp, inclusive */ int c1, c2; /* column positions, inclusive */ int rowbase; /* Baseline of text row */ int capheight; /* capital letter height */ int h5050; int lcheight; /* lower-case letter height */ int bgcolor; /* 0 - 255 */ HYPHENINFO hyphen; WILLUSBITMAP *bmp; WILLUSBITMAP *bmp8; WILLUSBITMAP *marked; } BMPREGION; typedef struct { WILLUSBITMAP bmp; int rows; int published_pages; int bgcolor; int fit_to_page; int wordcount; char debugfolder[256]; } MASTERINFO; static int verbose = 0; static int debug = 0; #define DEFAULT_WIDTH 600 #define DEFAULT_HEIGHT 800 #define MIN_REGION_WIDTH_INCHES 1.0 #define SRCROT_AUTO -999. #define SRCROT_AUTOEP -998. /* ** Blank Area Threshold Widths--average black pixel width, in inches, that ** prevents a region from being determined as "blank" or clear. */ static double gtc_in = .005; // detecting gap between columns static double gtr_in = .006; // detecting gap between rows static double gtw_in = .0015; // detecting gap between words // static double gtm_in=.005; // detecting margins for trimming static int src_left_to_right = 1; static int src_whitethresh = -1; static int dst_dpi = 167; static int fit_columns = 1; static int src_dpi = 300; static int dst_width = DEFAULT_WIDTH; /* Full device width in pixels */ static int dst_height = DEFAULT_HEIGHT; static int dst_userwidth = DEFAULT_WIDTH; static int dst_userheight = DEFAULT_HEIGHT; static int dst_justify = -1; // 0 = left, 1 = center static int dst_figure_justify = -1; // -1 = same as dst_justify. 0=left 1=center 2=right static double dst_min_figure_height_in = 0.75; static int dst_fulljustify = -1; // 0 = no, 1 = yes static int dst_color = 0; static int dst_landscape = 0; static double dst_mar = 0.06; static double dst_martop = -1.0; static double dst_marbot = -1.0; static double dst_marleft = -1.0; static double dst_marright = -1.0; static double min_column_gap_inches = 0.1; static double max_column_gap_inches = 1.5; // max gap between columns static double min_column_height_inches = 1.5; static double mar_top = -1.0; static double mar_bot = -1.0; static double mar_left = -1.0; static double mar_right = -1.0; static double max_region_width_inches = 3.6; /* Max viewable width (device width minus margins) */ static int max_columns = 2; static double column_gap_range = 0.33; static double column_offset_max = 0.2; static double column_row_gap_height_in = 1. / 72.; static int text_wrap = 1; static double word_spacing = 0.375; static double display_width_inches = 3.6; /* Device width = dst_width / dst_dpi */ static int column_fitted = 0; static double lm_org, bm_org, tm_org, rm_org, dpi_org; static double contrast_max = 2.0; static int show_marked_source = 0; static double defect_size_pts = 1.0; static double max_vertical_gap_inches = 0.25; static double vertical_multiplier = 1.0; static double vertical_line_spacing = -1.2; static double vertical_break_threshold = 1.75; static int erase_vertical_lines = 0; static int k2_hyphen_detect = 1; static int dst_fit_to_page = 0; /* ** Undocumented cmd-line args */ static double no_wrap_ar_limit = 0.2; /* -arlim */ static double no_wrap_height_limit_inches = 0.55; /* -whmax */ static double little_piece_threshold_inches = 0.5; /* -rwmin */ /* ** Keeping track of vertical gaps */ static double last_scale_factor_internal = -1.0; /* indicates desired vert. gap before next region is added. */ static int last_rowbase_internal; /* Pixels between last text row baseline and current end */ /* of destination bitmap. */ static int beginning_gap_internal = -1; static int last_h5050_internal = -1; static int just_flushed_internal = 0; static int gap_override_internal; /* If > 0, apply this gap in wrapbmp_flush() and then reset. */ void adjust_params_init(void); void set_region_widths(void); static void mark_source_page(BMPREGION *region, int caller_id, int mark_flags); static void fit_column_to_screen(double column_width_inches); static void restore_output_dpi(void); void adjust_contrast(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey, int *white); static int bmpregion_row_black_count(BMPREGION *region, int r0); static void bmpregion_row_histogram(BMPREGION *region); static int bmpregion_find_multicolumn_divider(BMPREGION *region, int *row_black_count, BMPREGION *pageregion, int *npr, int *colcount, int *rowcount); static int bmpregion_column_height_and_gap_test(BMPREGION *column, BMPREGION *region, int r1, int r2, int cmid, int *colcount, int *rowcount); static int bmpregion_is_clear(BMPREGION *region, int *row_is_clear, double gt_in); void bmpregion_multicolumn_add(BMPREGION *region, MASTERINFO *masterinfo, int level, PAGEINFO *pageinfo, int colgap0_pixels); static void bmpregion_vertically_break(BMPREGION *region, MASTERINFO *masterinfo, int allow_text_wrapping, double force_scale, int *colcount, int *rowcount, PAGEINFO *pageinfo, int colgap_pixels, int ncols); static void bmpregion_add(BMPREGION *region, BREAKINFO *breakinfo, MASTERINFO *masterinfo, int allow_text_wrapping, int trim_flags, int allow_vertical_breaks, double force_scale, int justify_flags, int caller_id, int *colcount, int *rowcount, PAGEINFO *pageinfo, int mark_flags, int rowbase_delta); static void dst_add_gap_src_pixels(char *caller, MASTERINFO *masterinfo, int pixels); static void dst_add_gap(MASTERINFO *masterinfo, double inches); static void bmp_src_to_dst(MASTERINFO *masterinfo, WILLUSBITMAP *src, int justification_flags, int whitethresh, int nocr, int dpi); static void bmp_fully_justify(WILLUSBITMAP *jbmp, WILLUSBITMAP *src, int nocr, int whitethresh, int just); #ifdef HAVE_OCR static void ocrwords_fill_in(OCRWORDS *words,WILLUSBITMAP *src,int whitethresh,int dpi); #endif static void bmpregion_trim_margins(BMPREGION *region, int *colcount0, int *rowcount0, int flags); static void bmpregion_hyphen_detect(BMPREGION *region); #if (WILLUSDEBUGX & 6) static void breakinfo_echo(BREAKINFO *bi); #endif #if (defined(WILLUSDEBUGX) || defined(WILLUSDEBUG)) static void bmpregion_write(BMPREGION *region,char *filename); #endif static int height2_calc(int *rc, int n); static void trim_to(int *count, int *i1, int i2, double gaplen); static void bmpregion_analyze_justification_and_line_spacing(BMPREGION *region, BREAKINFO *breakinfo, MASTERINFO *masterinfo, int *colcount, int *rowcount, PAGEINFO *pageinfo, int allow_text_wrapping, double force_scale); static int bmpregion_is_centered(BMPREGION *region, BREAKINFO *breakinfo, int i1, int i2, int *textheight); static double median_val(double *x, int n); static void bmpregion_find_vertical_breaks(BMPREGION *region, BREAKINFO *breakinfo, int *colcount, int *rowcount, double apsize_in); static void textrow_assign_bmpregion(TEXTROW *textrow, BMPREGION *region); static void breakinfo_compute_row_gaps(BREAKINFO *breakinfo, int r2); static void breakinfo_compute_col_gaps(BREAKINFO *breakinfo, int c2); static void breakinfo_remove_small_col_gaps(BREAKINFO *breakinfo, int lcheight, double mingap); static void breakinfo_remove_small_rows(BREAKINFO *breakinfo, double fracrh, double fracgap, BMPREGION *region, int *colcount, int *rowcount); static void breakinfo_alloc(int index, BREAKINFO *breakinfo, int nrows); static void breakinfo_free(int index, BREAKINFO *breakinfo); static void breakinfo_sort_by_gap(BREAKINFO *breakinfo); static void breakinfo_sort_by_row_position(BREAKINFO *breakinfo); static void bmpregion_one_row_find_breaks(BMPREGION *region, BREAKINFO *breakinfo, int *colcount, int *rowcount, int add_to_dbase); void wrapbmp_init(void); static int wrapbmp_ends_in_hyphen(void); static void wrapbmp_set_color(int is_color); static void wrapbmp_free(void); static void wrapbmp_set_maxgap(int value); static int wrapbmp_width(void); static int wrapbmp_remaining(void); static void wrapbmp_add(BMPREGION *region, int gap, int line_spacing, int rbase, int gio, int justification_flags); static void wrapbmp_flush(MASTERINFO *masterinfo, int allow_full_justify, PAGEINFO *pageinfo, int use_bgi); static void wrapbmp_hyphen_erase(void); static void bmpregion_one_row_wrap_and_add(BMPREGION *region, BREAKINFO *breakinfo, int index, int i0, int i1, MASTERINFO *masterinfo, int justflags, int *colcount, int *rowcount, PAGEINFO *pageinfo, int rheight, int mean_row_gap, int rowbase, int marking_flags, int pi); static void white_margins(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey); static void get_white_margins(BMPREGION *region); /* Bitmap orientation detection functions */ static double bitmap_orientation(WILLUSBITMAP *bmp); static double bmp_inflections_vertical(WILLUSBITMAP *srcgrey, int ndivisions, int delta, int *wthresh); static double bmp_inflections_horizontal(WILLUSBITMAP *srcgrey, int ndivisions, int delta, int *wthresh); static int inflection_count(double *x, int n, int delta, int *wthresh); static void pdfboxes_init(PDFBOXES *boxes); static void pdfboxes_free(PDFBOXES *boxes); /* static void pdfboxes_add_box(PDFBOXES *boxes,PDFBOX *box); static void pdfboxes_delete(PDFBOXES *boxes,int n); */ static void word_gaps_add(BREAKINFO *breakinfo, int lcheight, double *median_gap); static void bmp_detect_vertical_lines(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp, double dpi, double minwidth_in, double maxwidth_in, double minheight_in, double anglemax_deg, int white_thresh); static int vert_line_erase(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp, WILLUSBITMAP *tmp, int row0, int col0, double tanthx, double minheight_in, double minwidth_in, double maxwidth_in, int white_thresh); static void willus_dmem_alloc_warn(int index, void **ptr, int size, char *funcname, int exitcode); static void willus_dmem_free(int index, double **ptr, char *funcname); static int willus_mem_alloc_warn(void **ptr, int size, char *name, int exitcode); static void willus_mem_free(double **ptr, char *name); static void sortd(double *x, int n); static void sorti(int *x, int n); static void bmp_init(WILLUSBITMAP *bmap); static int bmp_alloc(WILLUSBITMAP *bmap); static void bmp_free(WILLUSBITMAP *bmap); static int bmp_copy(WILLUSBITMAP *dest, WILLUSBITMAP *src); static void bmp_fill(WILLUSBITMAP *bmp,int r,int g,int b); static int bmp_bytewidth(WILLUSBITMAP *bmp); static unsigned char *bmp_rowptr_from_top(WILLUSBITMAP *bmp, int row); static void bmp_more_rows(WILLUSBITMAP *bmp, double ratio, int pixval); static int bmp_is_grayscale(WILLUSBITMAP *bmp); static int bmp_resample(WILLUSBITMAP *dest, WILLUSBITMAP *src, double x1, double y1, double x2, double y2, int newwidth, int newheight); static void bmp_contrast_adjust(WILLUSBITMAP *dest,WILLUSBITMAP *src,double contrast); static void bmp_convert_to_greyscale_ex(WILLUSBITMAP *dst, WILLUSBITMAP *src); static int bmpmupdf_pixmap_to_bmp(WILLUSBITMAP *bmp, fz_context *ctx, fz_pixmap *pixmap); static void handle(int wait, ddjvu_context_t *ctx); static MASTERINFO _masterinfo, *masterinfo; static int master_bmp_inited = 0; static int master_bmp_width = 0; static int master_bmp_height = 0; static int max_page_width_pix = 3000; static int max_page_height_pix = 4000; static double shrink_factor = 0.9; static double zoom_value = 1.0; static double gamma_correction = 1.0; static void k2pdfopt_reflow_bmp(MASTERINFO *masterinfo, WILLUSBITMAP *src) { PAGEINFO _pageinfo, *pageinfo; WILLUSBITMAP _srcgrey, *srcgrey; int i, white, dpi; double area_ratio; masterinfo->debugfolder[0] = '\0'; white = src_whitethresh; /* Will be set by adjust_contrast() or set to src_whitethresh */ dpi = src_dpi; adjust_params_init(); set_region_widths(); srcgrey = &_srcgrey; if (master_bmp_inited == 0) { bmp_init(&masterinfo->bmp); master_bmp_inited = 1; } bmp_free(&masterinfo->bmp); bmp_init(&masterinfo->bmp); bmp_init(srcgrey); wrapbmp_init(); int ii; masterinfo->bmp.bpp = 8; for (ii = 0; ii < 256; ii++) masterinfo->bmp.red[ii] = masterinfo->bmp.blue[ii] = masterinfo->bmp.green[ii] = ii; masterinfo->rows = 0; masterinfo->bmp.width = dst_width; area_ratio = 8.5 * 11.0 * dst_dpi * dst_dpi / (dst_width * dst_height); masterinfo->bmp.height = dst_height * area_ratio * 1.5; bmp_alloc(&masterinfo->bmp); bmp_fill(&masterinfo->bmp, 255, 255, 255); BMPREGION region; bmp_copy(srcgrey, src); adjust_contrast(src, srcgrey, &white); white_margins(src, srcgrey); region.r1 = 0; region.r2 = srcgrey->height - 1; region.c1 = 0; region.c2 = srcgrey->width - 1; region.bgcolor = white; region.bmp = src; region.bmp8 = srcgrey; masterinfo->bgcolor = white; masterinfo->fit_to_page = dst_fit_to_page; /* Check to see if master bitmap might need more room */ bmpregion_multicolumn_add(®ion, masterinfo, 1, pageinfo, (int) (0.25 * src_dpi + .5)); master_bmp_width = masterinfo->bmp.width; master_bmp_height = masterinfo->rows; bmp_free(srcgrey); } void k2pdfopt_set_params(int bb_width, int bb_height, double page_margin, \ double line_space, double word_space, \ int wrapping, double contrast) { dst_userwidth = bb_width; // dst_width is adjusted in adjust_params_init dst_userheight = bb_height; vertical_line_spacing = line_space; word_spacing = word_space; text_wrap = wrapping; gamma_correction = contrast; // contrast is only used by k2pdfopt_mupdf_reflow dst_mar = page_margin; dst_martop = -1.0; dst_marbot = -1.0; dst_marleft = -1.0; dst_marright = -1.0; } void k2pdfopt_mupdf_reflow(fz_document *doc, fz_page *page, fz_context *ctx, \ double zoom, double gamma, double rot_deg) { fz_device *dev; fz_pixmap *pix; fz_rect bounds,bounds2; fz_matrix ctm; fz_bbox bbox; WILLUSBITMAP _src, *src; double dpp; double dpi = 250*zoom; do { dpp = dpi / 72.; pix = NULL; fz_var(pix); bounds = fz_bound_page(doc, page); ctm = fz_scale(dpp, dpp); // ctm=fz_concat(ctm,fz_rotate(rotation)); bounds2 = fz_transform_rect(ctm, bounds); bbox = fz_round_rect(bounds2); printf("reading page:%d,%d,%d,%d zoom:%.2f dpi:%.0f\n",bbox.x0,bbox.y0,bbox.x1,bbox.y1,zoom,dpi); zoom_value = zoom; zoom *= shrink_factor; dpi *= shrink_factor; } while (bbox.x1 > max_page_width_pix | bbox.y1 > max_page_height_pix); // ctm=fz_translate(0,-page->mediabox.y1); // ctm=fz_concat(ctm,fz_scale(dpp,-dpp)); // ctm=fz_concat(ctm,fz_rotate(page->rotate)); // ctm=fz_concat(ctm,fz_rotate(0)); // bbox=fz_round_rect(fz_transform_rect(ctm,page->mediabox)); // pix=fz_new_pixmap_with_rect(colorspace,bbox); pix = fz_new_pixmap_with_bbox(ctx, fz_device_gray, bbox); fz_clear_pixmap_with_value(ctx, pix, 0xff); dev = fz_new_draw_device(ctx, pix); #ifdef MUPDF_TRACE fz_device *tdev; fz_try(ctx) { tdev = fz_new_trace_device(ctx); fz_run_page(doc, page, tdev, ctm, NULL); } fz_always(ctx) { fz_free_device(tdev); } #endif fz_run_page(doc, page, dev, ctm, NULL); fz_free_device(dev); if(gamma_correction >= 0.0) { fz_gamma_pixmap(ctx, pix, gamma_correction); } src = &_src; masterinfo = &_masterinfo; bmp_init(src); int status = bmpmupdf_pixmap_to_bmp(src, ctx, pix); k2pdfopt_reflow_bmp(masterinfo, src); bmp_free(src); fz_drop_pixmap(ctx, pix); } void k2pdfopt_djvu_reflow(ddjvu_page_t *page, ddjvu_context_t *ctx, \ ddjvu_render_mode_t mode, ddjvu_format_t *fmt, double zoom) { WILLUSBITMAP _src, *src; ddjvu_rect_t prect; ddjvu_rect_t rrect; int i, iw, ih, idpi, status; double dpi = 250*zoom; while (!ddjvu_page_decoding_done(page)) handle(1, ctx); iw = ddjvu_page_get_width(page); ih = ddjvu_page_get_height(page); idpi = ddjvu_page_get_resolution(page); prect.x = prect.y = 0; do { prect.w = iw * dpi / idpi; prect.h = ih * dpi / idpi; printf("reading page:%d,%d,%d,%d dpi:%.0f\n",prect.x,prect.y,prect.w,prect.h,dpi); zoom_value = zoom; zoom *= shrink_factor; dpi *= shrink_factor; } while (prect.w > max_page_width_pix | prect.h > max_page_height_pix); rrect = prect; src = &_src; masterinfo = &_masterinfo; bmp_init(src); src->width = prect.w = iw * dpi / idpi; src->height = prect.h = ih * dpi / idpi; src->bpp = 8; rrect = prect; bmp_alloc(src); if (src->bpp == 8) { int ii; for (ii = 0; ii < 256; ii++) src->red[ii] = src->blue[ii] = src->green[ii] = ii; } ddjvu_format_set_row_order(fmt, 1); status = ddjvu_page_render(page, mode, &prect, &rrect, fmt, bmp_bytewidth(src), (char *) src->data); k2pdfopt_reflow_bmp(masterinfo, src); bmp_free(src); } void k2pdfopt_rfbmp_size(int *width, int *height) { *width = master_bmp_width; *height = master_bmp_height; } void k2pdfopt_rfbmp_ptr(unsigned char** bmp_ptr_ptr) { *bmp_ptr_ptr = masterinfo->bmp.data; } void k2pdfopt_rfbmp_zoom(double *zoom) { *zoom = zoom_value; } /* ansi.c */ #define MAXSIZE 8000 static int ansi_on=1; static char ansi_buffer[MAXSIZE]; int avprintf(FILE *f, char *fmt, va_list args) { int status; { if (!ansi_on) { status = vsprintf(ansi_buffer, fmt, args); ansi_parse(f, ansi_buffer); } else status = vfprintf(f, fmt, args); } return (status); } int aprintf(char *fmt, ...) { va_list args; int status; va_start(args, fmt); status = avprintf(stdout, fmt, args); va_end(args); return (status); } /* ** Ensure that max_region_width_inches will be > MIN_REGION_WIDTH_INCHES ** ** Should only be called once, after all params are set. ** */ void adjust_params_init(void) { if (dst_landscape) { dst_width = dst_userheight; dst_height = dst_userwidth; } else { dst_width = dst_userwidth; dst_height = dst_userheight; } if (dst_mar < 0.) dst_mar = 0.02; if (dst_martop < 0.) dst_martop = dst_mar; if (dst_marbot < 0.) dst_marbot = dst_mar; if (dst_marleft < 0.) dst_marleft = dst_mar; if (dst_marright < 0.) dst_marright = dst_mar; if ((double) dst_width / dst_dpi - dst_marleft - dst_marright< MIN_REGION_WIDTH_INCHES) { int olddpi; olddpi = dst_dpi; dst_dpi = (int) ((double) dst_width / (MIN_REGION_WIDTH_INCHES + dst_marleft + dst_marright)); aprintf( TTEXT_BOLD2 "Output DPI of %d is too large. Reduced to %d." TTEXT_NORMAL "\n\n", olddpi, dst_dpi); } } void set_region_widths(void) { max_region_width_inches = display_width_inches = (double) dst_width / dst_dpi; max_region_width_inches -= (dst_marleft + dst_marright); /* This is ensured by adjust_dst_dpi() as of v1.17 */ /* if (max_region_width_inches < MIN_REGION_WIDTH_INCHES) max_region_width_inches = MIN_REGION_WIDTH_INCHES; */ } /* ** Process full source page bitmap into rectangular regions and add ** to the destination bitmap. Start by looking for columns. ** ** level = recursion level. First call = 1, then 2, ... ** */ void bmpregion_multicolumn_add(BMPREGION *region, MASTERINFO *masterinfo, int level, PAGEINFO *pageinfo, int colgap0_pixels) { static char *funcname = "bmpregion_multicolumn_add"; int *row_black_count; int r2, rh, r0, cgr, maxlevel; BMPREGION *srcregion, _srcregion; BMPREGION *newregion, _newregion; BMPREGION *pageregion; double minh; int ipr, npr, na; int *colcount, *rowcount; willus_dmem_alloc_warn(1, (void **) &colcount, sizeof(int) * (region->c2 + 1), funcname, 10); willus_dmem_alloc_warn(2, (void **) &rowcount, sizeof(int) * (region->r2 + 1), funcname, 10); maxlevel = max_columns / 2; if (debug) printf("@bmpregion_multicolumn_add (%d,%d) - (%d,%d) lev=%d\n", region->c1, region->r1, region->c2, region->r2, level); newregion = &_newregion; (*newregion) = (*region); /* Establish colcount, rowcount arrays */ bmpregion_trim_margins(newregion, colcount, rowcount, 0xf); (*newregion) = (*region); srcregion = &_srcregion; (*srcregion) = (*region); /* How many page regions do we need? */ minh = min_column_height_inches; if (minh < .01) minh = .1; na = (srcregion->r2 - srcregion->r1 + 1) / src_dpi / minh; if (na < 1) na = 1; na += 16; /* Allocate page regions */ willus_dmem_alloc_warn(3, (void **) &pageregion, sizeof(BMPREGION) * na, funcname, 10); #ifdef COMMENT mindr=src_dpi*.045; /* src->height/250; */ if (mindr<1) mindr=1; #endif // white=250; // for (i=0;iwidth;i++) // colcount[i]=0; if (debug) bmpregion_row_histogram(region); /* ** Store information about which rows are mostly clear for future ** processing (saves processing time). */ willus_dmem_alloc_warn(4, (void **) &row_black_count, region->bmp8->height * sizeof(int), funcname, 10); for (cgr = 0, r0 = 0; r0 < region->bmp8->height; r0++) { row_black_count[r0] = bmpregion_row_black_count(region, r0); if (row_black_count[r0] == 0) cgr++; /* int dr; dr=mindr; if (r0+dr>region->bmp8->height) dr=region->bmp8->height-r0; if ((row_is_clear[r0]=bmpregion_row_mostly_white(region,r0,dr))!=0) cgr++; */ // printf("row_is_clear[%d]=%d\n",r0,row_is_clear[r0]); } if (verbose) printf("%d clear rows.\n", cgr); if (max_columns == 1) { pageregion[0] = (*srcregion); /* Set c1 negative to indicate full span */ pageregion[0].c1 = -1 - pageregion[0].c1; npr = 1; } else /* Find all column dividers in source region and store sequentially in pageregion[] array */ for (npr = 0, rh = 0; srcregion->r1 <= srcregion->r2; srcregion->r1 += rh) { static char *ierr = TTEXT_WARN "\n\aInternal error--not enough allocated regions.\n" "Please inform the developer at willus.com.\n\n" TTEXT_NORMAL; if (npr >= na - 3) { aprintf("%s", ierr); break; } rh = bmpregion_find_multicolumn_divider(srcregion, row_black_count, pageregion, &npr, colcount, rowcount); if (verbose) printf("rh=%d/%d\n", rh, region->r2 - region->r1 + 1); } /* Process page regions by column */ if (debug) printf("Page regions: %d\n", npr); r2 = -1; for (ipr = 0; ipr < npr;) { int r20, jpr, colnum, colgap_pixels; for (colnum = 1; colnum <= 2; colnum++) { if (debug) { printf("ipr = %d of %d...\n", ipr, npr); printf("COLUMN %d...\n", colnum); } r20 = r2; for (jpr = ipr; jpr < npr; jpr += 2) { /* If we get to a page region that spans the entire source, stop */ if (pageregion[jpr].c1 < 0) break; /* See if we should suspend this column and start displaying the next one */ if (jpr > ipr) { double cpdiff, cdiv1, cdiv2, rowgap1_in, rowgap2_in; if (column_offset_max < 0.) break; /* Did column divider move too much? */ cdiv1 = (pageregion[jpr].c2 + pageregion[jpr + 1].c1) / 2.; cdiv2 = (pageregion[jpr - 2].c2 + pageregion[jpr - 1].c1) / 2.; cpdiff = fabs( (double) (cdiv1 - cdiv2) / (srcregion->c2 - srcregion->c1 + 1)); if (cpdiff > column_offset_max) break; /* Is gap between this column region and next column region too big? */ rowgap1_in = (double) (pageregion[jpr].r1 - pageregion[jpr - 2].r2) / src_dpi; rowgap2_in = (double) (pageregion[jpr + 1].r1 - pageregion[jpr - 1].r2) / src_dpi; if (rowgap1_in > 0.28 && rowgap2_in > 0.28) break; } (*newregion) = pageregion[ src_left_to_right ? jpr + colnum - 1 : jpr + (2 - colnum)]; /* Preserve vertical gap between this region and last region */ if (r20 >= 0 && newregion->r1 - r20 >= 0) colgap_pixels = newregion->r1 - r20; else colgap_pixels = colgap0_pixels; if (level < maxlevel) bmpregion_multicolumn_add(newregion, masterinfo, level + 1, pageinfo, colgap_pixels); else { bmpregion_vertically_break(newregion, masterinfo, text_wrap, fit_columns ? -2.0 : -1.0, colcount, rowcount, pageinfo, colgap_pixels, 2 * level); } r20 = newregion->r2; } if (r20 > r2) r2 = r20; if (jpr == ipr) break; } if (jpr < npr && pageregion[jpr].c1 < 0) { if (debug) printf("SINGLE COLUMN REGION...\n"); (*newregion) = pageregion[jpr]; newregion->c1 = -1 - newregion->c1; /* dst_add_gap_src_pixels("Col level",masterinfo,newregion->r1-r2); */ colgap_pixels = newregion->r1 - r2; bmpregion_vertically_break(newregion, masterinfo, text_wrap, (fit_columns && (level > 1)) ? -2.0 : -1.0, colcount, rowcount, pageinfo, colgap_pixels, level); r2 = newregion->r2; jpr++; } ipr = jpr; } willus_dmem_free(4, (double **) &row_black_count, funcname); willus_dmem_free(3, (double **) &pageregion, funcname); willus_dmem_free(2, (double **) &rowcount, funcname); willus_dmem_free(1, (double **) &colcount, funcname); } static void fit_column_to_screen(double column_width_inches) { double text_width_pixels, lm_pixels, rm_pixels, tm_pixels, bm_pixels; if (!column_fitted) { dpi_org = dst_dpi; lm_org = dst_marleft; rm_org = dst_marright; tm_org = dst_martop; bm_org = dst_marbot; } text_width_pixels = max_region_width_inches * dst_dpi; lm_pixels = dst_marleft * dst_dpi; rm_pixels = dst_marright * dst_dpi; tm_pixels = dst_martop * dst_dpi; bm_pixels = dst_marbot * dst_dpi; dst_dpi = text_width_pixels / column_width_inches; dst_marleft = lm_pixels / dst_dpi; dst_marright = rm_pixels / dst_dpi; dst_martop = tm_pixels / dst_dpi; dst_marbot = bm_pixels / dst_dpi; set_region_widths(); column_fitted = 1; } static void restore_output_dpi(void) { if (column_fitted) { dst_dpi = dpi_org; dst_marleft = lm_org; dst_marright = rm_org; dst_martop = tm_org; dst_marbot = bm_org; set_region_widths(); } column_fitted = 0; } void adjust_contrast(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey, int *white) { int i, j, tries, wc, tc, hist[256]; double contrast, rat0; WILLUSBITMAP *dst, _dst; if (debug && verbose) printf("\nAt adjust_contrast.\n"); if ((*white) <= 0) (*white) = 192; /* If contrast_max negative, use it as fixed contrast adjustment. */ if (contrast_max < 0.) { bmp_contrast_adjust(srcgrey, srcgrey, -contrast_max); if (dst_color && fabs(contrast_max + 1.0) > 1e-4) bmp_contrast_adjust(src, src, -contrast_max); return; } dst = &_dst; bmp_init(dst); wc = 0; /* Avoid compiler warning */ tc = srcgrey->width * srcgrey->height; rat0 = 0.5; /* Avoid compiler warning */ for (contrast = 1.0, tries = 0; contrast < contrast_max + .01; tries++) { if (fabs(contrast - 1.0) > 1e-4) bmp_contrast_adjust(dst, srcgrey, contrast); else bmp_copy(dst, srcgrey); /*Get bitmap histogram */ for (i = 0; i < 256; i++) hist[i] = 0; for (j = 0; j < dst->height; j++) { unsigned char *p; p = bmp_rowptr_from_top(dst, j); for (i = 0; i < dst->width; i++, p++) hist[p[0]]++; } if (tries == 0) { int h1; for (h1 = 0, j = (*white); j < 256; j++) h1 += hist[j]; rat0 = (double) h1 / tc; if (debug && verbose) printf(" rat0 = rat[%d-255]=%.4f\n", (*white), rat0); } /* Find white ratio */ /* for (wc=hist[254],j=253;j>=252;j--) if (hist[j]>wc1) wc1=hist[j]; */ for (wc = 0, j = 252; j <= 255; j++) wc += hist[j]; /* if ((double)wc/tc >= rat0*0.7 && (double)hist[255]/wc > 0.995) break; */ if (debug && verbose) printf(" %2d. Contrast=%7.2f, rat[252-255]/rat0=%.4f\n", tries + 1, contrast, (double) wc / tc / rat0); if ((double) wc / tc >= rat0 * 0.94) break; contrast *= 1.05; } if (debug) printf("Contrast=%7.2f, rat[252-255]/rat0=%.4f\n", contrast, (double) wc / tc / rat0); /* bmp_write(dst,"outc.png",stdout,100); wfile_written_info("outc.png",stdout); exit(10); */ bmp_copy(srcgrey, dst); /* Maybe don't adjust the contrast for the color bitmap? */ if (dst_color && fabs(contrast - 1.0) > 1e-4) bmp_contrast_adjust(src, src, contrast); bmp_free(dst); } static int bmpregion_row_black_count(BMPREGION *region, int r0) { unsigned char *p; int i, nc, c; p = bmp_rowptr_from_top(region->bmp8, r0) + region->c1; nc = region->c2 - region->c1 + 1; for (c = i = 0; i < nc; i++, p++) if (p[0] < region->bgcolor) c++; return (c); } /* ** Returns height of region found and divider position in (*divider_column). ** (*divider_column) is absolute position on source bitmap. ** */ static int bmpregion_find_multicolumn_divider(BMPREGION *region, int *row_black_count, BMPREGION *pageregion, int *npr, int *colcount, int *rowcount) { int itop, i, dm, middle, divider_column, min_height_pixels, mhp2, min_col_gap_pixels; BMPREGION _newregion, *newregion, column[2]; BREAKINFO *breakinfo, _breakinfo; int *rowmin, *rowmax; static char *funcname = "bmpregion_find_multicolumn_divider"; if (debug) printf("@bmpregion_find_multicolumn_divider(%d,%d)-(%d,%d)\n", region->c1, region->r1, region->c2, region->r2); breakinfo = &_breakinfo; breakinfo->textrow = NULL; breakinfo_alloc(101, breakinfo, region->r2 - region->r1 + 1); bmpregion_find_vertical_breaks(region, breakinfo, colcount, rowcount, column_row_gap_height_in); /* { printf("region (%d,%d)-(%d,%d) has %d breaks:\n",region->c1,region->r1,region->c2,region->r2,breakinfo->n); for (i=0;in;i++) printf(" Rows %d - %d\n",breakinfo->textrow[i].r1,breakinfo->textrow[i].r2); } */ newregion = &_newregion; (*newregion) = (*region); min_height_pixels = min_column_height_inches * src_dpi; /* src->height/15; */ mhp2 = min_height_pixels - 1; if (mhp2 < 0) mhp2 = 0; dm = 1 + (region->c2 - region->c1 + 1) * column_gap_range / 2.; middle = (region->c2 - region->c1 + 1) / 2; min_col_gap_pixels = (int) (min_column_gap_inches * src_dpi + .5); if (verbose) { printf("(dm=%d, width=%d, min_gap=%d)\n", dm, region->c2 - region->c1 + 1, min_col_gap_pixels); printf("Checking regions (r1=%d, r2=%d, minrh=%d)..", region->r1, region->r2, min_height_pixels); fflush(stdout); } breakinfo_sort_by_row_position(breakinfo); willus_dmem_alloc_warn(5, (void **) &rowmin, (region->c2 + 10) * 2 * sizeof(int), funcname, 10); rowmax = &rowmin[region->c2 + 10]; for (i = 0; i < region->c2 + 2; i++) { rowmin[i] = region->r2 + 2; rowmax[i] = -1; } /* Start with top-most and bottom-most regions, look for column dividers */ for (itop = 0; itop < breakinfo->n && breakinfo->textrow[itop].r1 < region->r2 + 1 - min_height_pixels; itop++) { int ibottom; for (ibottom = breakinfo->n - 1; ibottom >= itop && breakinfo->textrow[ibottom].r2 - breakinfo->textrow[itop].r1 >= min_height_pixels; ibottom--) { /* ** Look for vertical shaft of clear space that clearly demarcates ** two columns */ for (i = 0; i < dm; i++) { int foundgap, ii, c1, c2, iiopt, status; newregion->c1 = region->c1 + middle - i; /* If we've effectively already checked this shaft, move on */ if (itop >= rowmin[newregion->c1] && ibottom <= rowmax[newregion->c1]) continue; newregion->c2 = newregion->c1 + min_col_gap_pixels - 1; newregion->r1 = breakinfo->textrow[itop].r1; newregion->r2 = breakinfo->textrow[ibottom].r2; foundgap = bmpregion_is_clear(newregion, row_black_count, gtc_in); if (!foundgap && i > 0) { newregion->c1 = region->c1 + middle + i; newregion->c2 = newregion->c1 + min_col_gap_pixels - 1; foundgap = bmpregion_is_clear(newregion, row_black_count, gtc_in); } if (!foundgap) continue; /* Found a gap, but look for a better gap nearby */ c1 = newregion->c1; c2 = newregion->c2; for (iiopt = 0, ii = -min_col_gap_pixels; ii <= min_col_gap_pixels; ii++) { int newgap; newregion->c1 = c1 + ii; newregion->c2 = c2 + ii; newgap = bmpregion_is_clear(newregion, row_black_count, gtc_in); if (newgap > 0 && newgap < foundgap) { iiopt = ii; foundgap = newgap; if (newgap == 1) break; } } newregion->c1 = c1 + iiopt; /* If we've effectively already checked this shaft, move on */ if (itop >= rowmin[newregion->c1] && ibottom <= rowmax[newregion->c1]) continue; newregion->c2 = c2 + iiopt; divider_column = newregion->c1 + min_col_gap_pixels / 2; status = bmpregion_column_height_and_gap_test(column, region, breakinfo->textrow[itop].r1, breakinfo->textrow[ibottom].r2, divider_column, colcount, rowcount); /* If fails column height or gap test, mark as bad */ if (status) { if (itop < rowmin[newregion->c1]) rowmin[newregion->c1] = itop; if (ibottom > rowmax[newregion->c1]) rowmax[newregion->c1] = ibottom; } /* If right column too short, stop looking */ if (status & 2) break; if (!status) { int colheight; /* printf(" GOT COLUMN DIVIDER AT x=%d.\n",(*divider_column)); */ if (verbose) { printf("\n GOOD REGION: col gap=(%d,%d) - (%d,%d)\n" " r1=%d, r2=%d\n", newregion->c1, newregion->r1, newregion->c2, newregion->r2, breakinfo->textrow[itop].r1, breakinfo->textrow[ibottom].r2); } if (itop > 0) { /* add 1-column region */ pageregion[(*npr)] = (*region); pageregion[(*npr)].r2 = breakinfo->textrow[itop - 1].r2; if (pageregion[(*npr)].r2 > pageregion[(*npr)].bmp8->height - 1) pageregion[(*npr)].r2 = pageregion[(*npr)].bmp8->height - 1; bmpregion_trim_margins(&pageregion[(*npr)], colcount, rowcount, 0xf); /* Special flag to indicate full-width region */ pageregion[(*npr)].c1 = -1 - pageregion[(*npr)].c1; (*npr) = (*npr) + 1; } pageregion[(*npr)] = column[0]; (*npr) = (*npr) + 1; pageregion[(*npr)] = column[1]; (*npr) = (*npr) + 1; colheight = breakinfo->textrow[ibottom].r2 - region->r1 + 1; breakinfo_free(101, breakinfo); /* printf("Returning %d divider column = %d - %d\n",region->r2-region->r1+1,newregion->c1,newregion->c2); */ return (colheight); } } } } if (verbose) printf("NO GOOD REGION FOUND.\n"); pageregion[(*npr)] = (*region); bmpregion_trim_margins(&pageregion[(*npr)], colcount, rowcount, 0xf); /* Special flag to indicate full-width region */ pageregion[(*npr)].c1 = -1 - pageregion[(*npr)].c1; (*npr) = (*npr) + 1; /* (*divider_column)=region->c2+1; */ willus_dmem_free(5, (double **) &rowmin, funcname); breakinfo_free(101, breakinfo); /* printf("Returning %d\n",region->r2-region->r1+1); */ return (region->r2 - region->r1 + 1); } /* ** 1 = column 1 too short ** 2 = column 2 too short ** 3 = both too short ** 0 = both okay ** Both columns must pass height requirement. ** ** Also, if gap between columns > max_column_gap_inches, fails test. (8-31-12) ** */ static int bmpregion_column_height_and_gap_test(BMPREGION *column, BMPREGION *region, int r1, int r2, int cmid, int *colcount, int *rowcount) { int min_height_pixels, status; status = 0; min_height_pixels = min_column_height_inches * src_dpi; column[0] = (*region); column[0].r1 = r1; column[0].r2 = r2; column[0].c2 = cmid - 1; bmpregion_trim_margins(&column[0], colcount, rowcount, 0xf); /* printf(" COL1: pix=%d (%d - %d)\n",newregion->r2-newregion->r1+1,newregion->r1,newregion->r2); */ if (column[0].r2 - column[0].r1 + 1 < min_height_pixels) status |= 1; column[1] = (*region); column[1].r1 = r1; column[1].r2 = r2; column[1].c1 = cmid; column[1].c2 = region->c2; bmpregion_trim_margins(&column[1], colcount, rowcount, 0xf); /* printf(" COL2: pix=%d (%d - %d)\n",newregion->r2-newregion->r1+1,newregion->r1,newregion->r2); */ if (column[1].r2 - column[1].r1 + 1 < min_height_pixels) status |= 2; /* Make sure gap between columns is not too large */ if (max_column_gap_inches >= 0. && column[1].c1 - column[0].c2 - 1 > max_column_gap_inches * src_dpi) status |= 4; return (status); } /* ** Return 0 if there are dark pixels in the region. NZ otherwise. */ static int bmpregion_is_clear(BMPREGION *region, int *row_black_count, double gt_in) { int r, c, nc, pt; /* ** row_black_count[] doesn't necessarily match up to this particular region's columns. ** So if row_black_count[] == 0, the row is clear, otherwise it has to be counted. ** because the columns are a subset. */ /* nr=region->r2-region->r1+1; */ nc = region->c2 - region->c1 + 1; pt = (int) (gt_in * src_dpi * nc + .5); if (pt < 0) pt = 0; for (c = 0, r = region->r1; r <= region->r2; r++) { if (r < 0 || r >= region->bmp8->height) continue; if (row_black_count[r] == 0) continue; c += bmpregion_row_black_count(region, r); if (c > pt) return (0); } /* printf("(%d,%d)-(%d,%d): c=%d, pt=%d (gt_in=%g)\n", region->c1,region->r1,region->c2,region->r2,c,pt,gt_in); */ return (1 + (int) 10 * c / pt); } static void bmpregion_row_histogram(BMPREGION *region) { static char *funcname = "bmpregion_row_histogram"; WILLUSBITMAP *src; FILE *out; static int *rowcount; static int *hist; int i, j, nn; willus_dmem_alloc_warn(6, (void **) &rowcount, (region->r2 - region->r1 + 1) * sizeof(int), funcname, 10); willus_dmem_alloc_warn(7, (void **) &hist, (region->c2 - region->c1 + 1) * sizeof(int), funcname, 10); src = region->bmp8; for (j = region->r1; j <= region->r2; j++) { unsigned char *p; p = bmp_rowptr_from_top(src, j) + region->c1; rowcount[j - region->r1] = 0; for (i = region->c1; i <= region->c2; i++, p++) if (p[0] < region->bgcolor) rowcount[j - region->r1]++; } for (i = region->c1; i <= region->c2; i++) hist[i - region->c1] = 0; for (i = region->r1; i <= region->r2; i++) hist[rowcount[i - region->r1]]++; for (i = region->c2 - region->c1 + 1; i >= 0; i--) if (hist[i] > 0) break; nn = i; out = fopen("hist.ep", "w"); for (i = 0; i <= nn; i++) fprintf(out, "%5d %5d\n", i, hist[i]); fclose(out); out = fopen("rowcount.ep", "w"); for (i = 0; i < region->r2 - region->r1 + 1; i++) fprintf(out, "%5d %5d\n", i, rowcount[i]); fclose(out); willus_dmem_free(7, (double **) &hist, funcname); willus_dmem_free(6, (double **) &rowcount, funcname); } /* ** Mark the region ** mark_flags & 1 : Mark top ** mark_flags & 2 : Mark bottom ** mark_flags & 4 : Mark left ** mark_flags & 8 : Mark right ** */ static void mark_source_page(BMPREGION *region0, int caller_id, int mark_flags) { static int display_order = 0; int i, n, nn, fontsize, r, g, b, shownum; char num[16]; BMPREGION *region, _region; BMPREGION *clip, _clip; if (!show_marked_source) return; if (region0 == NULL) { display_order = 0; return; } region = &_region; (*region) = (*region0); /* Clip the region w/ignored margins */ clip = &_clip; clip->bmp = region0->bmp; get_white_margins(clip); if (region->c1 < clip->c1) region->c1 = clip->c1; if (region->c2 > clip->c2) region->c2 = clip->c2; if (region->r1 < clip->r1) region->r1 = clip->r1; if (region->r2 > clip->r2) region->r2 = clip->r2; if (region->r2 <= region->r1 || region->c2 <= region->c1) return; /* printf("@mark_source_page(display_order=%d)\n",display_order); */ if (caller_id == 1) { display_order++; shownum = 1; n = (int) (src_dpi / 60. + 0.5); if (n < 5) n = 5; r = 255; g = b = 0; } else if (caller_id == 2) { shownum = 0; n = 2; r = 0; g = 0; b = 255; } else if (caller_id == 3) { shownum = 0; n = (int) (src_dpi / 80. + 0.5); if (n < 4) n = 4; r = 0; g = 255; b = 0; } else if (caller_id == 4) { shownum = 0; n = 2; r = 255; g = 0; b = 255; } else { shownum = 0; n = 2; r = 140; g = 140; b = 140; } if (n < 2) n = 2; nn = (region->c2 + 1 - region->c1) / 2; if (n > nn) n = nn; nn = (region->r2 + 1 - region->r1) / 2; if (n > nn) n = nn; if (n < 1) n = 1; for (i = 0; i < n; i++) { int j; unsigned char *p; if (mark_flags & 1) { p = bmp_rowptr_from_top(region->marked, region->r1 + i) + region->c1 * 3; for (j = region->c1; j <= region->c2; j++, p += 3) { p[0] = r; p[1] = g; p[2] = b; } } if (mark_flags & 2) { p = bmp_rowptr_from_top(region->marked, region->r2 - i) + region->c1 * 3; for (j = region->c1; j <= region->c2; j++, p += 3) { p[0] = r; p[1] = g; p[2] = b; } } if (mark_flags & 16) /* rowbase */ { p = bmp_rowptr_from_top(region->marked, region->rowbase - i) + region->c1 * 3; for (j = region->c1; j <= region->c2; j++, p += 3) { p[0] = r; p[1] = g; p[2] = b; } } if (mark_flags & 4) for (j = region->r1; j <= region->r2; j++) { p = bmp_rowptr_from_top(region->marked, j) + (region->c1 + i) * 3; p[0] = r; p[1] = g; p[2] = b; } if (mark_flags & 8) for (j = region->r1; j <= region->r2; j++) { p = bmp_rowptr_from_top(region->marked, j) + (region->c2 - i) * 3; p[0] = r; p[1] = g; p[2] = b; } } if (!shownum) return; fontsize = region->c2 - region->c1 + 1; if (fontsize > region->r2 - region->r1 + 1) fontsize = region->r2 - region->r1 + 1; fontsize /= 2; if (fontsize > src_dpi) fontsize = src_dpi; if (fontsize < 5) return; fontrender_set_typeface("helvetica-bold"); fontrender_set_fgcolor(r, g, b); fontrender_set_bgcolor(255, 255, 255); fontrender_set_pixel_size(fontsize); fontrender_set_justification(4); fontrender_set_or(1); sprintf(num, "%d", display_order); fontrender_render(region->marked, (double) (region->c1 + region->c2) / 2., (double) (region->marked->height - ((region->r1 + region->r2) / 2.)), num, 0, NULL); /* printf(" done mark_source_page.\n"); */ } /* ** Input: A generic rectangular region from the source file. It will not ** be checked for multiple columns, but the text may be wrapped ** (controlled by allow_text_wrapping input). ** ** force_scale == -2 : Use same scale for entire column--fit to device ** ** This function looks for vertical gaps in the region and breaks it at ** the widest ones (if there are significantly wider ones). ** */ static void bmpregion_vertically_break(BMPREGION *region, MASTERINFO *masterinfo, int allow_text_wrapping, double force_scale, int *colcount, int *rowcount, PAGEINFO *pageinfo, int colgap_pixels, int ncols) { static int ncols_last = -1; int regcount, i, i1, biggap, revert, trim_flags, allow_vertical_breaks; int justification_flags, caller_id, marking_flags, rbdelta; // int trim_left_and_right; BMPREGION *bregion, _bregion; BREAKINFO *breakinfo, _breakinfo; double region_width_inches, region_height_inches; #if (WILLUSDEBUGX & 1) printf("\n\n@bmpregion_vertically_break. colgap_pixels=%d\n\n",colgap_pixels); #endif trim_flags = 0xf; allow_vertical_breaks = 1; justification_flags = 0x8f; /* Don't know region justification status yet. Use user settings. */ rbdelta = -1; breakinfo = &_breakinfo; breakinfo->textrow = NULL; breakinfo_alloc(102, breakinfo, region->r2 - region->r1 + 1); bmpregion_find_vertical_breaks(region, breakinfo, colcount, rowcount, -1.0); /* Should there be a check for breakinfo->n==0 here? */ /* Don't think it breaks anything to let it go. -- 6-11-12 */ #if (WILLUSDEBUGX & 2) breakinfo_echo(breakinfo); #endif breakinfo_remove_small_rows(breakinfo, 0.25, 0.5, region, colcount, rowcount); #if (WILLUSDEBUGX & 2) breakinfo_echo(breakinfo); #endif breakinfo->centered = bmpregion_is_centered(region, breakinfo, 0, breakinfo->n - 1, NULL); #if (WILLUSDEBUGX & 2) breakinfo_echo(breakinfo); #endif /* newregion=&_newregion; for (i=0;in;i++) { (*newregion)=(*region); newregion->r1=breakinfo->textrow[i].r1; newregion->r2=breakinfo->textrow[i].r2; bmpregion_add(newregion,breakinfo,masterinfo,allow_text_wrapping,force_scale,0,1, colcount,rowcount,pageinfo,0,0xf); } breakinfo_free(breakinfo); return; */ /* if (!vertical_breaks) { caller_id=100; marking_flags=0; bmpregion_add(region,breakinfo,masterinfo,allow_text_wrapping,trim_flags, allow_vertical_breaks,force_scale,justification_flags, caller_id,colcount,rowcount,pageinfo,marking_flags,rbdelta); breakinfo_free(breakinfo); return; } */ /* Red, numbered region */ mark_source_page(region, 1, 0xf); bregion = &_bregion; if (debug) { if (!allow_text_wrapping) printf( "@bmpregion_vertically_break (no break) (%d,%d) - (%d,%d) (scale=%g)\n", region->c1, region->r1, region->c2, region->r2, force_scale); else printf( "@bmpregion_vertically_break (allow break) (%d,%d) - (%d,%d) (scale=%g)\n", region->c1, region->r1, region->c2, region->r2, force_scale); } /* ** Tag blank rows and columns */ if (vertical_break_threshold < 0. || breakinfo->n < 6) biggap = -1.; else { int gap_median; /* int rowheight_median; breakinfo_sort_by_rowheight(breakinfo); rowheight_median = breakinfo->textrow[breakinfo->n/2].rowheight; */ #ifdef WILLUSDEBUG for (i=0;in;i++) printf(" gap[%d]=%d\n",i,breakinfo->textrow[i].gap); #endif breakinfo_sort_by_gap(breakinfo); gap_median = breakinfo->textrow[breakinfo->n / 2].gap; #ifdef WILLUSDEBUG printf(" median=%d\n",gap_median); #endif biggap = gap_median * vertical_break_threshold; breakinfo_sort_by_row_position(breakinfo); } #ifdef WILLUSDEBUG printf(" biggap=%d\n",biggap); #endif region_width_inches = (double) (region->c2 - region->c1 + 1) / src_dpi; region_height_inches = (double) (region->r2 - region->r1 + 1) / src_dpi; /* trim_left_and_right = 1; if (region_width_inches <= max_region_width_inches) trim_left_and_right = 0; */ /* printf("force_scale=%g, rwi = %g, rwi/mrwi = %g, rhi = %g\n", force_scale, region_width_inches, region_width_inches / max_region_width_inches, region_height_inches); */ if (force_scale < -1.5 && region_width_inches > MIN_REGION_WIDTH_INCHES && region_width_inches / max_region_width_inches < 1.25 && region_height_inches > 0.5) { revert = 1; force_scale = -1.0; fit_column_to_screen(region_width_inches); // trim_left_and_right = 0; allow_text_wrapping = 0; } else revert = 0; /* Add the regions (broken vertically) */ caller_id = 1; /* if (trim_left_and_right) trim_flags=0xf; else trim_flags=0xc; */ trim_flags = 0xf; for (regcount = i1 = i = 0; i1 < breakinfo->n; i++) { int i2; i2 = i < breakinfo->n ? i : breakinfo->n - 1; if (i >= breakinfo->n || (biggap > 0. && breakinfo->textrow[i2].gap >= biggap)) { int j, c1, c2, nc, nowrap; double regwidth, ar1, rh1; // printf("CALLER 1: i1=%d, i2=%d (breakinfo->n=%d)\n",i1,i2,breakinfo->n); (*bregion) = (*region); bregion->r1 = breakinfo->textrow[i1].r1; bregion->r2 = breakinfo->textrow[i2].r2; c1 = breakinfo->textrow[i1].c1; c2 = breakinfo->textrow[i1].c2; nc = c2 - c1 + 1; if (nc <= 0) nc = 1; rh1 = (double) (breakinfo->textrow[i1].r2 - breakinfo->textrow[i1].r1 + 1) / src_dpi; ar1 = (double) (breakinfo->textrow[i1].r2 - breakinfo->textrow[i1].r1 + 1) / nc; for (j = i1 + 1; j <= i2; j++) { if (c1 > breakinfo->textrow[j].c1) c1 = breakinfo->textrow[j].c1; if (c2 < breakinfo->textrow[j].c2) c2 = breakinfo->textrow[j].c2; } regwidth = (double) (c2 - c1 + 1) / src_dpi; marking_flags = (i1 == 0 ? 0 : 1) | (i2 == breakinfo->n - 1 ? 0 : 2); /* Green */ mark_source_page(bregion, 3, marking_flags); nowrap = ((regwidth <= max_region_width_inches && allow_text_wrapping < 2) || (ar1 > no_wrap_ar_limit && rh1 > no_wrap_height_limit_inches)); /* ** If between regions, or if the next region isn't going to be ** wrapped, or if the next region starts a different number of ** columns than before, then "flush and gap." */ if (regcount > 0 || just_flushed_internal || nowrap || (ncols_last > 0 && ncols_last != ncols)) { int gap; #ifdef WILLUSDEBUG printf("wrapflush1\n"); #endif if (!just_flushed_internal) wrapbmp_flush(masterinfo, 0, pageinfo, 0); gap = regcount == 0 ? colgap_pixels : breakinfo->textrow[i1 - 1].gap; if (regcount == 0 && beginning_gap_internal > 0) { if (last_h5050_internal > 0) { if (fabs( 1. - (double) breakinfo->textrow[i1].h5050 / last_h5050_internal) > .1) dst_add_gap_src_pixels("Col/Page break", masterinfo, colgap_pixels); last_h5050_internal = -1; } gap = beginning_gap_internal; beginning_gap_internal = -1; } dst_add_gap_src_pixels("Vert break", masterinfo, gap); } else { if (regcount == 0 && beginning_gap_internal < 0) beginning_gap_internal = colgap_pixels; } bmpregion_add(bregion, breakinfo, masterinfo, allow_text_wrapping, trim_flags, allow_vertical_breaks, force_scale, justification_flags, caller_id, colcount, rowcount, pageinfo, marking_flags, rbdelta); regcount++; i1 = i2 + 1; } } ncols_last = ncols; if (revert) restore_output_dpi(); breakinfo_free(102, breakinfo); } /* ** ** MAIN BITMAP REGION ADDING FUNCTION ** ** NOTE: This function calls itself recursively! ** ** Input: A generic rectangular region from the source file. It will not ** be checked for multiple columns, but the text may be wrapped ** (controlled by allow_text_wrapping input). ** ** First, excess margins are trimmed off of the region. ** ** Then, if the resulting trimmed region is wider than the max desirable width ** and allow_text_wrapping is non-zero, then the ** bmpregion_analyze_justification_and_line_spacing() function is called. ** Otherwise the region is scaled to fit and added to the master set of pages. ** ** justification_flags ** Bits 6-7: 0 = document is not fully justified ** 1 = document is fully justified ** 2 = don't know document justification yet ** Bits 4-5: 0 = Use user settings ** 1 = fully justify ** 2 = do not fully justify ** Bits 2-3: 0 = document is left justified ** 1 = document is centered ** 2 = document is right justified ** 3 = don't know document justification yet ** Bits 0-1: 0 = left justify document ** 1 = center document ** 2 = right justify document ** 3 = Use user settings ** ** force_scale = -2.0 : Fit column width to display width ** force_scale = -1.0 : Use output dpi unless the region doesn't fit. ** In that case, scale it down until it fits. ** force_scale > 0.0 : Scale region by force_scale. ** ** mark_flags & 1 : Mark top ** mark_flags & 2 : Mark bottom ** mark_flags & 4 : Mark left ** mark_flags & 8 : Mark right ** ** trim_flags & 0x80 : Do NOT re-trim no matter what. ** */ static void bmpregion_add(BMPREGION *region, BREAKINFO *breakinfo, MASTERINFO *masterinfo, int allow_text_wrapping, int trim_flags, int allow_vertical_breaks, double force_scale, int justification_flags, int caller_id, int *colcount, int *rowcount, PAGEINFO *pageinfo, int mark_flags, int rowbase_delta) { int w, wmax, i, nc, nr, h, bpp, tall_region; double region_width_inches; WILLUSBITMAP *bmp, _bmp; BMPREGION *newregion, _newregion; newregion = &_newregion; (*newregion) = (*region); #if (WILLUSDEBUGX & 1) printf("@bmpregion_add (%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2); printf(" trimflags = %X\n",trim_flags); #endif if (debug) { if (!allow_text_wrapping) printf("@bmpregion_add (no break) (%d,%d) - (%d,%d) (scale=%g)\n", region->c1, region->r1, region->c2, region->r2, force_scale); else printf( "@bmpregion_add (allow break) (%d,%d) - (%d,%d) (scale=%g)\n", region->c1, region->r1, region->c2, region->r2, force_scale); } /* ** Tag blank rows and columns and trim the blank margins off ** trimflags = 0xf for all margin trim. ** trimflags = 0xc for just top and bottom margins. */ bmpregion_trim_margins(newregion, colcount, rowcount, trim_flags); #if (WILLUSDEBUGX & 1) printf(" After trim: (%d,%d) - (%d,%d)\n",newregion->c1,newregion->r1,newregion->c2,newregion->r2); #endif nc = newregion->c2 - newregion->c1 + 1; nr = newregion->r2 - newregion->r1 + 1; // printf("nc=%d, nr=%d\n",nc,nr); if (verbose) { printf(" row range adjusted to %d - %d\n", newregion->r1, newregion->r2); printf(" col range adjusted to %d - %d\n", newregion->c1, newregion->c2); } if (nc <= 5 || nr <= 1) return; region_width_inches = (double) nc / src_dpi; // printf("regwidth = %g in\n",region_width_inches); /* Use untrimmed region left/right if possible */ if (caller_id == 1 && region_width_inches <= max_region_width_inches) { int trimleft, trimright; int maxpix, dpix; maxpix = (int) (max_region_width_inches * src_dpi + .5); #if (WILLUSDEBUGX & 1) printf(" Trimming. C's = %4d %4d %4d %4d\n",region->c1,newregion->c1,newregion->c2,region->c2); printf(" maxpix = %d, regwidth = %d\n",maxpix,region->c2-region->c1+1); #endif if (maxpix > (region->c2 - region->c1 + 1)) maxpix = region->c2 - region->c1 + 1; // printf(" maxpix = %d\n",maxpix); dpix = (region->c2 - region->c1 + 1 - maxpix) / 2; // printf(" dpix = %d\n",dpix); trimright = region->c2 - newregion->c2; trimleft = newregion->c1 - region->c1; if (trimleft < trimright) { if (trimleft > dpix) newregion->c1 = region->c1 + dpix; newregion->c2 = newregion->c1 + maxpix - 1; } else { if (trimright > dpix) newregion->c2 = region->c2 - dpix; newregion->c1 = newregion->c2 - maxpix + 1; } if (newregion->c1 < region->c1) newregion->c1 = region->c1; if (newregion->c2 > region->c2) newregion->c2 = region->c2; nc = newregion->c2 - newregion->c1 + 1; #if (WILLUSDEBUGX & 1) printf(" Post Trim. C's = %4d %4d %4d %4d\n",region->c1,newregion->c1,newregion->c2,region->c2); #endif region_width_inches = (double) nc / src_dpi; } /* ** Try breaking the region into smaller horizontal pieces (wrap text lines) */ /* printf("allow_text_wrapping=%d, region_width_inches=%g, max_region_width_inches=%g\n", allow_text_wrapping,region_width_inches,max_region_width_inches); */ /* New in v1.50, if allow_text_wrapping==2, unwrap short lines. */ if (allow_text_wrapping == 2 || (allow_text_wrapping == 1 && region_width_inches > max_region_width_inches)) { bmpregion_analyze_justification_and_line_spacing(newregion, breakinfo, masterinfo, colcount, rowcount, pageinfo, 1, force_scale); return; } /* ** If allowed, re-submit each vertical region individually */ if (allow_vertical_breaks) { bmpregion_analyze_justification_and_line_spacing(newregion, breakinfo, masterinfo, colcount, rowcount, pageinfo, 0, force_scale); return; } /* AT THIS POINT, BITMAP IS NOT TO BE BROKEN UP HORIZONTALLY OR VERTICALLY */ /* (IT CAN STILL BE FULLY JUSTIFIED IF ALLOWED.) */ /* ** Scale region to fit the destination device width and add to the master bitmap. ** ** ** Start by copying source region to new bitmap ** */ // printf("c1=%d\n",newregion->c1); /* Is it a figure? */ tall_region = (double) (newregion->r2 - newregion->r1 + 1) / src_dpi >= dst_min_figure_height_in; /* Re-trim left and right? */ if ((trim_flags & 0x80) == 0) { /* If tall region and figure justification turned on ... */ if ((tall_region && dst_figure_justify >= 0) /* ... or if centered region ... */ || ((trim_flags & 3) != 3 && ((justification_flags & 3) == 1 || ((justification_flags & 3) == 3 && (dst_justify == 1 || (dst_justify < 0 && (justification_flags & 0xc) == 4)))))) { bmpregion_trim_margins(newregion, colcount, rowcount, 0x3); nc = newregion->c2 - newregion->c1 + 1; region_width_inches = (double) nc / src_dpi; } } #if (WILLUSDEBUGX & 1) aprintf("atomic region: " ANSI_CYAN "%.2f x %.2f in" ANSI_NORMAL " c1=%d, (%d x %d) (rbdel=%d) just=0x%02X\n", (double)(newregion->c2-newregion->c1+1)/src_dpi, (double)(newregion->r2-newregion->r1+1)/src_dpi, newregion->c1, (newregion->c2-newregion->c1+1), (newregion->r2-newregion->r1+1), rowbase_delta,justification_flags); #endif /* Copy atomic region into bmp */ bmp = &_bmp; bmp_init(bmp); bmp->width = nc; bmp->height = nr; if (dst_color) bmp->bpp = 24; else { bmp->bpp = 8; for (i = 0; i < 256; i++) bmp->red[i] = bmp->blue[i] = bmp->green[i] = i; } bmp_alloc(bmp); bpp = dst_color ? 3 : 1; // printf("r1=%d, r2=%d\n",newregion->r1,newregion->r2); for (i = newregion->r1; i <= newregion->r2; i++) { unsigned char *psrc, *pdst; pdst = bmp_rowptr_from_top(bmp, i - newregion->r1); psrc = bmp_rowptr_from_top(dst_color ? newregion->bmp : newregion->bmp8, i) + bpp * newregion->c1; memcpy(pdst, psrc, nc * bpp); } /* ** Now scale to appropriate destination size. ** ** force_scale is used to maintain uniform scaling so that ** most of the regions are scaled at the same value. ** ** force_scale = -2.0 : Fit column width to display width ** force_scale = -1.0 : Use output dpi unless the region doesn't fit. ** In that case, scale it down until it fits. ** force_scale > 0.0 : Scale region by force_scale. ** */ /* Max viewable pixel width on device screen */ wmax = (int) (masterinfo->bmp.width - (dst_marleft + dst_marright) * dst_dpi + 0.5); if (force_scale > 0.) w = (int) (force_scale * bmp->width + 0.5); else { if (region_width_inches < max_region_width_inches) w = (int) (region_width_inches * dst_dpi + .5); else w = wmax; } /* Special processing for tall regions (likely figures) */ if (tall_region && w < wmax && dst_fit_to_page != 0) { if (dst_fit_to_page < 0) w = wmax; else { w = (int) (w * (1. + (double) dst_fit_to_page / 100.) + 0.5); if (w > wmax) w = wmax; } } h = (int) (((double) w / bmp->width) * bmp->height + .5); /* ** If scaled dimensions are finite, add to master bitmap. */ if (w > 0 && h > 0) { WILLUSBITMAP *tmp, _tmp; int nocr; last_scale_factor_internal = (double) w / bmp->width; #ifdef HAVE_OCR if (dst_ocr) { nocr=(int)((double)bmp->width/w+0.5); if (nocr < 1) nocr=1; if (nocr > 10) nocr=10; w *= nocr; h *= nocr; } else #endif nocr = 1; tmp = &_tmp; bmp_init(tmp); bmp_resample(tmp, bmp, (double) 0., (double) 0., (double) bmp->width, (double) bmp->height, w, h); bmp_free(bmp); /* { static int nn=0; char filename[256]; sprintf(filename,"xxx%02d.png",nn++); bmp_write(tmp,filename,stdout,100); } */ /* ** Add scaled bitmap to destination. */ /* Allocate more rows if necessary */ while (masterinfo->rows + tmp->height / nocr > masterinfo->bmp.height) bmp_more_rows(&masterinfo->bmp, 1.4, 255); /* Check special justification for tall regions */ if (tall_region && dst_figure_justify >= 0) justification_flags = dst_figure_justify; bmp_src_to_dst(masterinfo, tmp, justification_flags, region->bgcolor, nocr, (int) ((double) src_dpi * tmp->width / bmp->width + .5)); bmp_free(tmp); } /* Store delta to base of text row (used by wrapbmp_flush()) */ last_rowbase_internal = rowbase_delta; /* .05 was .072 in v1.35 */ /* dst_add_gap(&masterinfo->bmp,&masterinfo->rows,0.05); */ /* if (revert) restore_output_dpi(); */ } static void dst_add_gap_src_pixels(char *caller, MASTERINFO *masterinfo, int pixels) { double gap_inches; /* aprintf("%s " ANSI_GREEN "dst_add" ANSI_NORMAL " %.3f in (%d pix)\n",caller,(double)pixels/src_dpi,pixels); */ if (last_scale_factor_internal < 0.) gap_inches = (double) pixels / src_dpi; else gap_inches = (double) pixels * last_scale_factor_internal / dst_dpi; gap_inches *= vertical_multiplier; if (gap_inches > max_vertical_gap_inches) gap_inches = max_vertical_gap_inches; dst_add_gap(masterinfo, gap_inches); } static void dst_add_gap(MASTERINFO *masterinfo, double inches) { int n, bw; unsigned char *p; n = (int) (inches * dst_dpi + .5); if (n < 1) n = 1; while (masterinfo->rows + n > masterinfo->bmp.height) bmp_more_rows(&masterinfo->bmp, 1.4, 255); bw = bmp_bytewidth(&masterinfo->bmp) * n; p = bmp_rowptr_from_top(&masterinfo->bmp, masterinfo->rows); memset(p, 255, bw); masterinfo->rows += n; } /* ** ** Add already-scaled source bmp to destination bmp. ** Source bmp may be narrower than destination--if so, it may be fully justifed. ** dst = destination bitmap ** src = source bitmap ** dst and src bpp must match! ** All rows of src are applied to masterinfo->bmp starting at row masterinfo->rows ** Full justification is done if requested. ** */ static void bmp_src_to_dst(MASTERINFO *masterinfo, WILLUSBITMAP *src, int justification_flags, int whitethresh, int nocr, int dpi) { WILLUSBITMAP *src1, _src1; WILLUSBITMAP *tmp; #ifdef HAVE_OCR WILLUSBITMAP _tmp; OCRWORDS _words,*words; #endif int dw, dw2; int i, srcbytespp, srcbytewidth, go_full; int destwidth, destx0, just; if (src->width <= 0 || src->height <= 0) return; /* printf("@bmp_src_to_dst. dst->bpp=%d, src->bpp=%d, src=%d x %d\n",masterinfo->bmp.bpp,src->bpp,src->width,src->height); */ /* { static int count=0; static char filename[256]; printf(" @bmp_src_to_dst...\n"); sprintf(filename,"src%05d.png",count++); bmp_write(src,filename,stdout,100); } */ /* if (fulljust && dst_fulljustify) printf("srcbytespp=%d, srcbytewidth=%d, destwidth=%d, destx0=%d, destbytewidth=%d\n", srcbytespp,srcbytewidth,destwidth,destx0,dstbytewidth); */ /* Determine what justification to use */ /* Left? */ if ((justification_flags & 3) == 0 /* Mandatory left just */ || ((justification_flags & 3) == 3 /* Use user settings */ && (dst_justify == 0 || (dst_justify < 0 && (justification_flags & 0xc) == 0)))) just = 0; else if ((justification_flags & 3) == 2 || ((justification_flags & 3) == 3 && (dst_justify == 2 || (dst_justify < 0 && (justification_flags & 0xc) == 8)))) just = 2; else just = 1; /* Full justification? */ destwidth = (int) (masterinfo->bmp.width - (dst_marleft + dst_marright) * dst_dpi + .5); go_full = (destwidth * nocr > src->width && (((justification_flags & 0x30) == 0x10) || ((justification_flags & 0x30) == 0 // Use user settings && (dst_fulljustify == 1 || (dst_fulljustify < 0 && (justification_flags & 0xc0) == 0x40))))); /* Put fully justified text into src1 bitmap */ if (go_full) { src1 = &_src1; bmp_init(src1); bmp_fully_justify(src1, src, nocr * destwidth, whitethresh, just); } else src1 = src; #if (WILLUSDEBUGX & 1) printf("@bmp_src_to_dst: jflags=0x%02X just=%d, go_full=%d\n",justification_flags,just,go_full); printf(" destx0=%d, destwidth=%d, src->width=%d\n",destx0,destwidth,src->width); #endif #ifdef HAVE_OCR if (dst_ocr) { /* Run OCR on the bitmap */ words=&_words; ocrwords_init(words); ocrwords_fill_in(words,src1,whitethresh,dpi); /* Scale bitmap and word positions to destination size */ if (nocr>1) { tmp=&_tmp; bmp_init(tmp); bmp_integer_resample(tmp,src1,nocr); ocrwords_int_scale(words,nocr); } else tmp=src1; } else #endif tmp = src1; /* printf("writing...\n"); ocrwords_box(words,tmp); bmp_write(tmp,"out.png",stdout,100); exit(10); */ destx0 = (int) (dst_marleft * dst_dpi + .5); if (just == 0) dw = destx0; else if (just == 1) dw = destx0 + (destwidth - tmp->width) / 2; else dw = destx0 + destwidth - tmp->width; if (dw < 0) dw = 0; /* Add OCR words to destination list */ #ifdef HAVE_OCR if (dst_ocr) { ocrwords_offset(words,dw,masterinfo->rows); ocrwords_concatenate(dst_ocrwords,words); ocrwords_free(words); } #endif /* Add tmp bitmap to dst */ srcbytespp = tmp->bpp == 24 ? 3 : 1; srcbytewidth = tmp->width * srcbytespp; dw2 = masterinfo->bmp.width - tmp->width - dw; dw *= srcbytespp; dw2 *= srcbytespp; for (i = 0; i < tmp->height; i++, masterinfo->rows++) { unsigned char *pdst, *psrc; psrc = bmp_rowptr_from_top(tmp, i); pdst = bmp_rowptr_from_top(&masterinfo->bmp, masterinfo->rows); memset(pdst, 255, dw); pdst += dw; memcpy(pdst, psrc, srcbytewidth); pdst += srcbytewidth; memset(pdst, 255, dw2); } #ifdef HAVE_OCR if (dst_ocr && nocr>1) bmp_free(tmp); #endif if (go_full) bmp_free(src1); } /* ** Spread words out in src and put into jbmp at scaling nocr ** In case the text can't be expanded enough, ** just=0 (left justify), 1 (center), 2 (right justify) */ static void bmp_fully_justify(WILLUSBITMAP *jbmp, WILLUSBITMAP *src, int jbmpwidth, int whitethresh, int just) { BMPREGION srcregion; BREAKINFO *colbreaks, _colbreaks; WILLUSBITMAP gray; int *gappos, *gapsize; int i, srcbytespp, srcbytewidth, jbmpbytewidth, newwidth, destx0, ng; static char *funcname = "bmp_fully_justify"; /* { char filename[256]; count++; sprintf(filename,"out%03d.png",count); bmp_write(src,filename,stdout,100); } */ /* Init/allocate destination bitmap */ jbmp->width = jbmpwidth; jbmp->height = src->height; jbmp->bpp = src->bpp; if (jbmp->bpp == 8) for (i = 0; i < 256; i++) jbmp->red[i] = jbmp->green[i] = jbmp->blue[i] = i; bmp_alloc(jbmp); /* Find breaks in the text row */ colbreaks = &_colbreaks; colbreaks->textrow = NULL; srcregion.bgcolor = whitethresh; srcregion.c1 = 0; srcregion.c2 = src->width - 1; srcregion.r1 = 0; srcregion.r2 = src->height - 1; srcbytespp = src->bpp == 24 ? 3 : 1; if (srcbytespp == 3) { srcregion.bmp = src; srcregion.bmp8 = &gray; bmp_init(srcregion.bmp8); bmp_convert_to_greyscale_ex(srcregion.bmp8, src); } else { srcregion.bmp = src; srcregion.bmp8 = src; } breakinfo_alloc(103, colbreaks, src->width); { int *colcount, *rowcount; colcount = rowcount = NULL; willus_dmem_alloc_warn(8, (void **) &colcount, sizeof(int) * (src->width + src->height), funcname, 10); rowcount = &colcount[src->width]; bmpregion_one_row_find_breaks(&srcregion, colbreaks, colcount, rowcount, 1); willus_dmem_free(8, (double **) &colcount, funcname); } if (srcbytespp == 3) bmp_free(srcregion.bmp8); ng = colbreaks->n - 1; gappos = NULL; if (ng > 0) { int maxsize, ms2, mingap, j; willus_dmem_alloc_warn(9, (void **) &gappos, (2 * sizeof(int)) * ng, funcname, 10); gapsize = &gappos[ng]; for (i = 0; i < ng; i++) { gappos[i] = colbreaks->textrow[i].c2 + 1; gapsize[i] = colbreaks->textrow[i].gap; } /* Take only the largest group of gaps */ for (maxsize = i = 0; i < ng; i++) if (maxsize < gapsize[i]) maxsize = gapsize[i]; mingap = srcregion.lcheight * word_spacing; if (mingap < 2) mingap = 2; if (maxsize > mingap) maxsize = mingap; ms2 = maxsize / 2; for (i = j = 0; i < ng; i++) if (gapsize[i] > ms2) { if (j != i) { gapsize[j] = gapsize[i]; gappos[j] = gappos[i]; } j++; } ng = j; /* Figure out total pixel expansion */ newwidth = src->width * 1.25; if (newwidth > jbmp->width) newwidth = jbmp->width; } else newwidth = src->width; breakinfo_free(103, colbreaks); /* Starting column in destination bitmap */ if (just == 1) destx0 = (jbmp->width - newwidth) / 2; else if (just == 2) destx0 = (jbmp->width - newwidth); else destx0 = 0; jbmpbytewidth = bmp_bytewidth(jbmp); srcbytewidth = bmp_bytewidth(src); /* Clear entire fully justified bitmap */ memset(bmp_rowptr_from_top(jbmp, 0), 255, jbmpbytewidth * jbmp->height); /* Spread out source pieces to fully justify them */ for (i = 0; i <= ng; i++) { int j, dx0, dx, sx0; unsigned char *pdst, *psrc; dx = i < ng ? (i > 0 ? gappos[i] - gappos[i - 1] : gappos[i] + 1) : (i > 0 ? src->width - (gappos[i - 1] + 1) : src->width); dx *= srcbytespp; sx0 = i == 0 ? 0 : (gappos[i - 1] + 1); dx0 = destx0 + sx0 + (i == 0 ? 0 : (newwidth - src->width) * i / ng); psrc = bmp_rowptr_from_top(src, 0) + sx0 * srcbytespp; pdst = bmp_rowptr_from_top(jbmp, 0) + dx0 * srcbytespp; for (j = 0; j < src->height; j++, pdst += jbmpbytewidth, psrc += srcbytewidth) memcpy(pdst, psrc, dx); } if (gappos != NULL) willus_dmem_free(9, (double **) &gappos, funcname); } /* ** flags&1 : trim c1 ** flags&2 : trim c2 ** flags&4 : trim r1 ** flags&8 : trim r2 ** flags&16 : Find rowbase, font size, etc. ** ** Row base is where row dist crosses 50% on r2 side. ** Font size is where row dist crosses 5% on other side (r1 side). ** Lowercase font size is where row dist crosses 50% on r1 side. ** ** For 12 pt font: ** Single spacing is 14.66 pts (Calibri), 13.82 pts (Times), 13.81 pts (Arial) ** Size of cap letter is 7.7 pts (Calibri), 8.1 pts (Times), 8.7 pts (Arial) ** Size of small letter is 5.7 pts (Calibri), 5.6 pts (Times), 6.5 pts (Arial) ** Mean line spacing = 1.15 - 1.22 (~1.16) ** Mean cap height = 0.68 ** Mean small letter height = 0.49 ** */ static void bmpregion_trim_margins(BMPREGION *region, int *colcount0, int *rowcount0, int flags) { int i, j, n; /* ,r1,r2,dr1,dr2,dr,vtrim,vspace; */ int *colcount, *rowcount; static char *funcname = "bmpregion_trim_margins"; /* To detect a hyphen, we need to trim and calc text base row */ if (flags & 32) flags |= 0x1f; if (colcount0 == NULL) willus_dmem_alloc_warn(10, (void **) &colcount, sizeof(int) * (region->c2 + 1), funcname, 10); else colcount = colcount0; if (rowcount0 == NULL) willus_dmem_alloc_warn(11, (void **) &rowcount, sizeof(int) * (region->r2 + 1), funcname, 10); else rowcount = rowcount0; n = region->c2 - region->c1 + 1; /* printf("Trim: reg=(%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2); if (region->c2+1 > cca || region->r2+1 > rca) { printf("A ha 0!\n"); exit(10); } */ memset(colcount, 0, (region->c2 + 1) * sizeof(int)); memset(rowcount, 0, (region->r2 + 1) * sizeof(int)); for (j = region->r1; j <= region->r2; j++) { unsigned char *p; p = bmp_rowptr_from_top(region->bmp8, j) + region->c1; for (i = 0; i < n; i++, p++) if (p[0] < region->bgcolor) { rowcount[j]++; colcount[i + region->c1]++; } } /* ** Trim excess margins */ if (flags & 1) trim_to(colcount, ®ion->c1, region->c2, src_left_to_right ? 2.0 : 4.0); if (flags & 2) trim_to(colcount, ®ion->c2, region->c1, src_left_to_right ? 4.0 : 2.0); if (colcount0 == NULL) willus_dmem_free(10, (double **) &colcount, funcname); if (flags & 4) trim_to(rowcount, ®ion->r1, region->r2, 4.0); if (flags & 8) trim_to(rowcount, ®ion->r2, region->r1, 4.0); if (flags & 16) { int maxcount, mc2, h2; double f; maxcount = 0; for (i = region->r1; i <= region->r2; i++) if (rowcount[i] > maxcount) maxcount = rowcount[i]; mc2 = maxcount / 2; for (i = region->r2; i >= region->r1; i--) if (rowcount[i] > mc2) break; region->rowbase = i; for (i = region->r1; i <= region->r2; i++) if (rowcount[i] > mc2) break; region->h5050 = region->lcheight = region->rowbase - i + 1; mc2 = maxcount / 20; for (i = region->r1; i <= region->r2; i++) if (rowcount[i] > mc2) break; region->capheight = region->rowbase - i + 1; /* ** Sanity check capheight and lcheight */ h2 = height2_calc(&rowcount[region->r1], region->r2 - region->r1 + 1); #if (WILLUSDEBUGX & 8) if (region->c2-region->c1 > 1500) printf("reg %d x %d (%d,%d) - (%d,%d) h2=%d ch/h2=%g\n",region->c2-region->c1+1,region->r2-region->r1+1,region->c1,region->r1,region->c2,region->r2,h2,(double)region->capheight/h2); #endif if (region->capheight < h2 * 0.75) region->capheight = h2; f = (double) region->lcheight / region->capheight; if (f < 0.55) region->lcheight = (int) (0.72 * region->capheight + .5); else if (f > 0.85) region->lcheight = (int) (0.72 * region->capheight + .5); #if (WILLUSDEBUGX & 8) if (region->c2-region->c1 > 1500) printf(" lcheight final = %d\n",region->lcheight); #endif #if (WILLUSDEBUGX & 10) if (region->c2-region->c1 > 1500 && region->r2-region->r1 < 100) { static int append=0; FILE *f; int i; f=fopen("textrows.ep",append==0?"w":"a"); append=1; for (i=region->r1;i<=region->r2;i++) fprintf(f,"%d %g\n",region->rowbase-i,(double)rowcount[i]/maxcount); fprintf(f,"//nc\n"); fclose(f); } #endif } else { region->h5050 = region->r2 - region->r1 + 1; region->capheight = 0.68 * (region->r2 - region->r1 + 1); region->lcheight = 0.5 * (region->r2 - region->r1 + 1); region->rowbase = region->r2; } #if (WILLUSDEBUGX & 2) printf("trim:\n reg->c1=%d, reg->c2=%d\n",region->c1,region->c2); printf(" reg->r1=%d, reg->r2=%d, reg->rowbase=%d\n\n",region->r1,region->r2,region->rowbase); #endif if (rowcount0 == NULL) willus_dmem_free(11, (double **) &rowcount, funcname); } /* ** Does region end in a hyphen? If so, fill in HYPHENINFO structure. */ static void bmpregion_hyphen_detect(BMPREGION *region) { int i, j; /* ,r1,r2,dr1,dr2,dr,vtrim,vspace; */ int width; int *r0, *r1, *r2, *r3; int rmin, rmax, rowbytes, nrmid, rsum; int cstart, cend, cdir; unsigned char *p; static char *funcname = "bmpregion_hyphen_detect"; #if (WILLUSDEBUGX & 16) static int count=0; char pngfile[256]; FILE *out; count++; printf("@bmpregion_hyphen_detect count=%d\n",count); sprintf(pngfile,"word%04d.png",count); bmpregion_write(region,pngfile); sprintf(pngfile,"word%04d.txt",count); out=fopen(pngfile,"w"); fprintf(out,"c1=%d, c2=%d, r1=%d, r2=%d\n",region->c1,region->c2,region->r1,region->r2); fprintf(out,"lcheight=%d\n",region->lcheight); #endif region->hyphen.ch = -1; region->hyphen.c2 = -1; if (!k2_hyphen_detect) return; width = region->c2 - region->c1 + 1; if (width < 2) return; willus_dmem_alloc_warn(27, (void **) &r0, sizeof(int) * 4 * width, funcname, 10); r1 = &r0[width]; r2 = &r1[width]; r3 = &r2[width]; for (i = 0; i < width; i++) r0[i] = r1[i] = r2[i] = r3[i] = -1; rmin = region->rowbase - region->capheight - region->lcheight * .04; if (rmin < region->r1) rmin = region->r1; rmax = region->rowbase + region->lcheight * .04; if (rmax > region->r2) rmax = region->r2; rowbytes = bmp_bytewidth(region->bmp8); p = bmp_rowptr_from_top(region->bmp8, 0); nrmid = rsum = 0; if (src_left_to_right) { cstart = region->c2; cend = region->c1 - 1; cdir = -1; } else { cstart = region->c1; cend = region->c2 + 1; cdir = 1; } #if (WILLUSDEBUGX & 16) fprintf(out," j r0 r1 r2 r3\n"); #endif for (j = cstart; j != cend; j += cdir) { int r, rmid, dr, drmax; // printf("j=%d\n",j); rmid = (rmin + rmax) / 2; // printf(" rmid=%d\n",rmid); drmax = region->r2 + 1 - rmid > rmid - region->r1 + 1 ? region->r2 + 1 - rmid : rmid - region->r1 + 1; /* Find dark region closest to center line */ for (dr = 0; dr < drmax; dr++) { if (rmid + dr <= region->r2 && p[(rmid + dr) * rowbytes + j] < region->bgcolor) break; if (rmid - dr >= region->r1 && p[(rmid - dr) * rowbytes + j] < region->bgcolor) { dr = -dr; break; } } #if (WILLUSDEBUGX & 16) fprintf(out," dr=%d/%d, rmid+dr=%d, rmin=%d, rmax=%d, nrmid=%d\n",dr,drmax,rmid+dr,rmin,rmax,nrmid); #endif /* No dark detected or mark is outside hyphen region? */ /* Termination criterion #1 */ if (dr >= drmax || (nrmid > 2 && (double) nrmid / region->lcheight > .1 && (rmid + dr < rmin || rmid + dr > rmax))) { if (region->hyphen.ch >= 0 && dr >= drmax) continue; if (nrmid > 2 && (double) nrmid / region->lcheight > .35) { region->hyphen.ch = j - cdir; region->hyphen.r1 = rmin; region->hyphen.r2 = rmax; } if (dr < drmax) { region->hyphen.c2 = j; break; } continue; } if (region->hyphen.ch >= 0) { region->hyphen.c2 = j; break; } nrmid++; rmid += dr; /* Dark spot is outside expected hyphen area */ /* if (rmidrmax) { if (nrmid>0) break; continue; } */ for (r = rmid; r >= region->r1; r--) if (p[r * rowbytes + j] >= region->bgcolor) break; r1[j - region->c1] = r + 1; r0[j - region->c1] = -1; if (r >= region->r1) { for (; r >= region->r1; r--) if (p[r * rowbytes + j] < region->bgcolor) break; if (r >= region->r1) r0[j - region->c1] = r; } for (r = rmid; r <= region->r2; r++) if (p[r * rowbytes + j] >= region->bgcolor) break; r2[j - region->c1] = r - 1; r3[j - region->c1] = -1; if (r <= region->r2) { for (; r <= region->r2; r++) if (p[r * rowbytes + j] < region->bgcolor) break; if (r <= region->r2) r3[j - region->c1] = r; } #if (WILLUSDEBUGX & 16) fprintf(out," %4d %4d %4d %4d %4d\n",j,r0[j-region->c1],r1[j-region->c1],r2[j-region->c1],r3[j-region->c1]); #endif if (region->hyphen.c2 < 0 && (r0[j - region->c1] >= 0 || r3[j - region->c1] >= 0)) region->hyphen.c2 = j; /* Termination criterion #2 */ if (nrmid > 2 && (double) nrmid / region->lcheight > .35 && (r1[j - region->c1] > rmax || r2[j - region->c1] < rmin)) { region->hyphen.ch = j - cdir; region->hyphen.r1 = rmin; region->hyphen.r2 = rmax; if (region->hyphen.c2 < 0) region->hyphen.c2 = j; break; } // rc=(r1[j-region->c1]+r2[j-region->c1])/2; /* DQ possible hyphen if r1/r2 out of range */ if (nrmid > 1) { /* Too far away from last values? */ if ((double) (rmin - r1[j - region->c1]) / region->lcheight > .1 || (double) (r2[j - region->c1] - rmax) / region->lcheight > .1) break; if ((double) nrmid / region->lcheight > .1 && nrmid > 1) { if ((double) fabs(rmin - r1[j - region->c1]) / region->lcheight > .1 || (double) (rmax - r2[j - region->c1]) / region->lcheight > .1) break; } } if (nrmid == 1 || r1[j - region->c1] < rmin) rmin = r1[j - region->c1]; if (nrmid == 1 || r2[j - region->c1] > rmax) rmax = r2[j - region->c1]; if ((double) nrmid / region->lcheight > .1 && nrmid > 1) { double rmean; /* Can't be too thick */ if ((double) (rmax - rmin) / region->lcheight > .55 || (double) (rmax - rmin) / region->lcheight < .08) break; /* Must be reasonably well centered above baseline */ rmean = (double) (rmax + rmin) / 2; if ((double) (region->rowbase - rmean) / region->lcheight < 0.35 || (double) (region->rowbase - rmean) / region->lcheight > 0.85) break; if ((double) (region->rowbase - rmax) / region->lcheight < 0.2 || (double) (region->rowbase - rmin) / region->lcheight > 0.92) break; } } #if (WILLUSDEBUGX & 16) fprintf(out," ch=%d, c2=%d, r1=%d, r2=%d\n",region->hyphen.ch,region->hyphen.c2,region->hyphen.r1,region->hyphen.r2); fclose(out); #endif /* More sanity checks--better to miss a hyphen than falsely detect it. */ if (region->hyphen.ch >= 0) { double ar; /* If it's only a hyphen, then it's probably actually a dash--don't detect it. */ if (region->hyphen.c2 < 0) region->hyphen.ch = -1; /* Check aspect ratio */ ar = (double) (region->hyphen.r2 - region->hyphen.r1) / nrmid; if (ar < 0.08 || ar > 0.75) region->hyphen.ch = -1; } willus_dmem_free(27, (double **) &r0, funcname); #if (WILLUSDEBUGX & 16) if (region->hyphen.ch>=0) printf("\n\n GOT HYPHEN.\n\n"); printf(" Exiting bmpregion_hyphen_detect\n"); #endif } #if (defined(WILLUSDEBUGX) || defined(WILLUSDEBUG)) static void bmpregion_write(BMPREGION *region,char *filename) { int i,bpp; WILLUSBITMAP *bmp,_bmp; bmp=&_bmp; bmp_init(bmp); bmp->width=region->c2-region->c1+1; bmp->height=region->r2-region->r1+1; bmp->bpp=region->bmp->bpp; bpp=bmp->bpp==8?1:3; bmp_alloc(bmp); for (i=0;i<256;i++) bmp->red[i]=bmp->green[i]=bmp->blue[i]=i; for (i=0;iheight;i++) { unsigned char *s,*d; s=bmp_rowptr_from_top(region->bmp,region->r1+i)+region->c1*bpp; d=bmp_rowptr_from_top(bmp,i); memcpy(d,s,bmp->width*bpp); } bmp_write(bmp,filename,stdout,97); bmp_free(bmp); } #endif #if (WILLUSDEBUGX & 6) static void breakinfo_echo(BREAKINFO *breakinfo) { int i; printf("@breakinfo_echo...\n"); for (i=0;in;i++) printf(" %2d. r1=%4d, rowbase=%4d, r2=%4d, c1=%4d, c2=%4d\n", i+1,breakinfo->textrow[i].r1, breakinfo->textrow[i].rowbase, breakinfo->textrow[i].r2, breakinfo->textrow[i].c1, breakinfo->textrow[i].c2); } #endif /* ** Calculate weighted height of a rectangular region. ** This weighted height is intended to be close to the height of ** a capital letter, or the height of the majority of the region. ** */ static int height2_calc(int *rc, int n) { int i, thresh, i1, h2; int *c; static char *funcname = "height2_calc"; #if (WILLUSDEBUGX & 8) int cmax; #endif if (n <= 0) return (1); willus_dmem_alloc_warn(12, (void **) &c, sizeof(int) * n, funcname, 10); memcpy(c, rc, n * sizeof(int)); sorti(c, n); #if (WILLUSDEBUGX & 8) cmax=c[n-1]; #endif for (i = 0; i < n - 1 && c[i] == 0; i++) ; thresh = c[(i + n) / 3]; willus_dmem_free(12, (double **) &c, funcname); for (i = 0; i < n - 1; i++) if (rc[i] >= thresh) break; i1 = i; for (i = n - 1; i > i1; i--) if (rc[i] >= thresh) break; #if (WILLUSDEBUGX & 8) // printf("thresh = %g, i1=%d, i2=%d\n",(double)thresh/cmax,i1,i); #endif h2 = i - i1 + 1; /* Guaranteed to be >=1 */ return (h2); } static void trim_to(int *count, int *i1, int i2, double gaplen) { int del, dcount, igaplen, clevel, dlevel, defect_start, last_defect; igaplen = (int) (gaplen * src_dpi / 72.); if (igaplen < 1) igaplen = 1; /* clevel=(int)(defect_size_pts*src_dpi/72./3.); */ clevel = 0; dlevel = (int) (pow(defect_size_pts * src_dpi / 72., 2.) * PI / 4. + .5); del = i2 > (*i1) ? 1 : -1; defect_start = -1; last_defect = -1; dcount = 0; for (; (*i1) != i2; (*i1) = (*i1) + del) { if (count[(*i1)] <= clevel) { dcount = 0; /* Reset defect size */ continue; } /* Mark found */ if (dcount == 0) { if (defect_start >= 0) last_defect = defect_start; defect_start = (*i1); } dcount += count[(*i1)]; if (dcount >= dlevel) { if (last_defect >= 0 && abs(defect_start - last_defect) <= igaplen) (*i1) = last_defect; else (*i1) = defect_start; return; } } if (defect_start < 0) return; if (last_defect < 0) { (*i1) = defect_start; return; } if (abs(defect_start - last_defect) <= igaplen) (*i1) = last_defect; else (*i1) = defect_start; } /* ** A region that needs its line spacing and justification analyzed. ** ** The region may be wider than the max desirable region width. ** ** Input: breakinfo should be valid row-break information for the region. ** ** Calls bmpregion_one_row_wrap_and_add() for each text row from the ** breakinfo structure that is within the region. ** */ static void bmpregion_analyze_justification_and_line_spacing(BMPREGION *region, BREAKINFO *breakinfo, MASTERINFO *masterinfo, int *colcount, int *rowcount, PAGEINFO *pageinfo, int allow_text_wrapping, double force_scale) { int i, i1, i2, ntr, mean_row_gap, maxgap, line_spacing, nls, nch; BMPREGION *newregion, _newregion; double *id, *c1, *c2, *ch, *lch, *ls; int *just, *indented, *short_line; double capheight, lcheight, fontsize; int textheight, ragged_right, src_line_spacing; static char *funcname = "bmpregion_analyze_justification_and_line_spacing"; #if (WILLUSDEBUGX & 1) printf("@bmpregion_analyze_justification_and_line_spacing"); printf(" (%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2); printf(" centering = %d\n",breakinfo->centered); #endif #if (WILLUSDEBUGX & 2) breakinfo_echo(breakinfo); #endif /* Locate the vertical part indices in the breakinfo structure */ newregion = &_newregion; breakinfo_sort_by_row_position(breakinfo); for (i = 0; i < breakinfo->n; i++) { TEXTROW *textrow; textrow = &breakinfo->textrow[i]; if ((textrow->r1 + textrow->r2) / 2 >= region->r1) break; } if (i >= breakinfo->n) return; i1 = i; for (; i < breakinfo->n; i++) { TEXTROW *textrow; textrow = &breakinfo->textrow[i]; if ((textrow->r1 + textrow->r2) / 2 > region->r2) break; } i2 = i - 1; if (i2 < i1) return; ntr = i2 - i1 + 1; #if (WILLUSDEBUGX & 1) printf(" i1=%d, i2=%d, ntr=%d\n",i1,i2,ntr); #endif willus_dmem_alloc_warn(13, (void **) &c1, sizeof(double) * 6 * ntr, funcname, 10); willus_dmem_alloc_warn(14, (void **) &just, sizeof(int) * 3 * ntr, funcname, 10); c2 = &c1[ntr]; ch = &c2[ntr]; lch = &ch[ntr]; ls = &lch[ntr]; id = &ls[ntr]; indented = &just[ntr]; short_line = &indented[ntr]; for (i = 0; i < ntr; i++) id[i] = i; /* Find baselines / font size */ capheight = lcheight = 0.; maxgap = -1; for (nch = nls = 0, i = i1; i <= i2; i++) { TEXTROW *textrow; double ar, rh; int marking_flags; textrow = &breakinfo->textrow[i]; c1[i - i1] = (double) textrow->c1; c2[i - i1] = (double) textrow->c2; if (i < i2 && maxgap < textrow->gap) { maxgap = textrow->gap; if (maxgap < 2) maxgap = 2; } if (textrow->c2 < textrow->c1) ar = 100.; else ar = (double) (textrow->r2 - textrow->r1 + 1) / (double) (textrow->c2 - textrow->c1 + 1); rh = (double) (textrow->r2 - textrow->r1 + 1) / src_dpi; if (i < i2 && ar <= no_wrap_ar_limit && rh <= no_wrap_height_limit_inches) ls[nls++] = breakinfo->textrow[i + 1].r1 - textrow->r1; if (ar <= no_wrap_ar_limit && rh <= no_wrap_height_limit_inches) { ch[nch] = textrow->capheight; lch[nch] = textrow->lcheight; nch++; } /* Mark region w/gray, mark rowbase also */ marking_flags = (i == i1 ? 0 : 1) | (i == i2 ? 0 : 2); if (i < i2 || textrow->r2 - textrow->rowbase > 1) marking_flags |= 0x10; (*newregion) = (*region); newregion->r1 = textrow->r1; newregion->r2 = textrow->r2; newregion->c1 = textrow->c1; newregion->c2 = textrow->c2; newregion->rowbase = textrow->rowbase; mark_source_page(newregion, 5, marking_flags); #if (WILLUSDEBUGX & 1) printf(" Row %2d: (%4d,%4d) - (%4d,%4d) rowbase=%4d, lch=%d, h5050=%d, rh=%d\n",i-i1+1,textrow->c1,textrow->r1,textrow->c2,textrow->r2,textrow->rowbase,textrow->lcheight,textrow->h5050,textrow->rowheight); #endif } wrapbmp_set_maxgap(maxgap); if (nch < 1) capheight = lcheight = 2; // Err on the side of too small else { capheight = median_val(ch, nch); lcheight = median_val(lch, nch); } // printf("capheight = %g, lcheight = %g\n",capheight,lcheight); bmpregion_is_centered(region, breakinfo, i1, i2, &textheight); /* ** For 12 pt font: ** Single spacing is 14.66 pts (Calibri), 13.82 pts (Times), 13.81 pts (Arial) ** Size of cap letter is 7.7 pts (Calibri), 8.1 pts (Times), 8.7 pts (Arial) ** Size of small letter is 5.7 pts (Calibri), 5.6 pts (Times), 6.5 pts (Arial) ** Mean line spacing = 1.15 - 1.22 (~1.16) ** Mean cap height = 0.68 ** Mean small letter height = 0.49 */ fontsize = (capheight + lcheight) / 1.17; // printf("font size = %g pts.\n",(fontsize/src_dpi)*72.); /* ** Set line spacing for this region */ if (nls > 0) src_line_spacing = median_val(ls, nls); else src_line_spacing = fontsize * 1.2; if (vertical_line_spacing < 0 && src_line_spacing <= fabs(vertical_line_spacing) * fontsize * 1.16) line_spacing = src_line_spacing; else line_spacing = fabs(vertical_line_spacing) * fontsize * 1.16; #if (WILLUSDEBUGX & 1) printf(" font size = %.2f pts = %d pixels\n",(fontsize/src_dpi)*72.,(int)(fontsize+.5)); printf(" src_line_spacing = %d, line_spacing = %d\n",src_line_spacing,line_spacing); #endif /* if (ntr==1) rheight= (int)((breakinfo->textrow[i1].r2 - breakinfo->textrow[i1].r1)*1.25+.5); else rheight = (int)((double)(breakinfo->textrow[i2].rowbase - breakinfo->textrow[i1].rowbase)/(ntr-1)+.5); */ mean_row_gap = line_spacing - textheight; if (mean_row_gap <= 1) mean_row_gap = 1; /* Try to figure out if we have a ragged right edge */ if (ntr < 3) ragged_right = 1; else { int flushcount; if (src_left_to_right) { for (flushcount = i = 0; i < ntr; i++) { #if (WILLUSDEBUGX & 1) printf(" flush_factors[%d] = %g (<.5), %g in (<.1)\n", i,(double)(region->c2-c2[i])/textheight,(double)(region->c2-c2[i])/src_dpi); #endif if ((double) (region->c2 - c2[i]) / textheight < 0.5 && (double) (region->c2 - c2[i]) / src_dpi < 0.1) flushcount++; } } else { for (flushcount = i = 0; i < ntr; i++) { #if (WILLUSDEBUGX & 1) printf(" flush_factors[%d] = %g (<.5), %g in (<.1)\n", i,(double)(c1[i]-region->c1)/textheight,(double)(c1[i]-region->c1)/src_dpi); #endif if ((double) (c1[i] - region->c1) / textheight < 0.5 && (double) (c1[i] - region->c1) / src_dpi < 0.1) flushcount++; } } ragged_right = (flushcount <= ntr / 2); /* if (src_left_to_right) { sortxyd(c2,id,ntr); del = region->c2 - c2[ntr-1-ntr/3]; sortxyd(id,c2,ntr); } else { sortxyd(c1,id,ntr); del = c1[ntr/3] - region->c1; sortxyd(id,c1,ntr); } del /= textheight; printf("del=%g\n",del); ragged_right = (del > 0.5); */ } #if (WILLUSDEBUGX & 1) printf("ragged_right=%d\n",ragged_right); #endif /* Store justification and other info line by line */ for (i = i1; i <= i2; i++) { double indent1, del; double i1f, ilfi, i2f, ilf, ifmin, dif; int centered; TEXTROW *textrow; textrow = &breakinfo->textrow[i]; i1f = (double) (c1[i - i1] - region->c1) / (region->c2 - region->c1 + 1); i2f = (double) (region->c2 - c2[i - i1]) / (region->c2 - region->c1 + 1); ilf = src_left_to_right ? i1f : i2f; ilfi = ilf * (region->c2 - region->c1 + 1) / src_dpi; /* Indent in inches */ ifmin = i1f < i2f ? i1f : i2f; dif = fabs(i1f - i2f); if (ifmin < .01) ifmin = 0.01; if (src_left_to_right) indent1 = (double) (c1[i - i1] - region->c1) / textheight; else indent1 = (double) (region->c2 - c2[i - i1]) / textheight; // printf(" row %2d: indent1=%g\n",i-i1,indent1); if (!breakinfo->centered) { indented[i - i1] = (indent1 > 0.5 && ilfi < 1.2 && ilf < .25); centered = (!indented[i - i1] && indent1 > 1.0 && dif / ifmin < 0.5); } else { centered = (dif < 0.1 || dif / ifmin < 0.5); indented[i - i1] = (indent1 > 0.5 && ilfi < 1.2 && ilf < .25 && !centered); } #if (WILLUSDEBUGX & 1) printf("Indent %d: %d. indent1=%g, ilf=%g, centered=%d\n",i-i1+1,indented[i-i1],indent1,ilf,centered); printf(" indent1=%g, i1f=%g, i2f=%g\n",indent1,i1f,i2f); #endif if (centered) just[i - i1] = 4; else { /* ** The .01 favors left justification over right justification in ** close cases. */ if (src_left_to_right) just[i - i1] = indented[i - i1] || (i1f < i2f + .01) ? 0 : 8; else just[i - i1] = indented[i - i1] || (i2f < i1f + .01) ? 8 : 0; } if (src_left_to_right) del = (double) (region->c2 - textrow->c2); else del = (double) (textrow->c1 - region->c1); /* Should we keep wrapping after this line? */ if (!ragged_right) short_line[i - i1] = (del / textheight > 0.5); else short_line[i - i1] = (del / (region->c2 - region->c1) > 0.25); /* If this row is a bigger/smaller row (font) than the next row, don't wrap. */ if (!short_line[i - i1] && i < i2) { TEXTROW *t1; t1 = &breakinfo->textrow[i + 1]; if ((textrow->h5050 > t1->h5050 * 1.5 || textrow->h5050 * 1.5 < t1->h5050) && (i == 0 || (i > 0 && (textrow->rowheight > t1->rowheight * 1.5 || textrow->rowheight * 1.5 < t1->rowheight)))) short_line[i - i1] = 1; } if (!ragged_right) just[i - i1] |= 0x40; #if (WILLUSDEBUGX & 1) printf(" just[%d]=0x%02X, shortline[%d]=%d\n",i-i1,just[i-i1],i-i1,short_line[i-i1]); printf(" textrow->c2=%d, region->c2=%d, del=%g, textheight=%d\n",textrow->c2,region->c2,del,textheight); #endif /* If short line, it should still be fully justified if it is wrapped. */ /* if (short_line[i-i1]) just[i-i1] = (just[i-i1]&0xf)|0x60; */ } /* { double mean1,mean2,stdev1,stdev2; array_mean(c1,ntr,&mean1,&stdev1); array_mean(c2,ntr,&mean2,&stdev2); printf("Mean c1, c2 = %g, %g; stddevs = %g, %g\n",mean1,mean2,stdev1,stdev2); printf("textheight = %d, line_spacing = %d\n",textheight,line_spacing); } */ for (i = i1; i <= i2; i++) { TEXTROW *textrow; int justflags, trimflags, centered, marking_flags, gap; #if (WILLUSDEBUGX & 1) aprintf("Row " ANSI_YELLOW "%d of %d" ANSI_NORMAL " (wrap=%d)\n",i-i1+1,i2-i1+1,allow_text_wrapping); #endif textrow = &breakinfo->textrow[i]; (*newregion) = (*region); newregion->r1 = textrow->r1; newregion->r2 = textrow->r2; /* The |3 tells it to use the user settings for left/right/center */ justflags = just[i - i1] | 0x3; centered = ((justflags & 0xc) == 4); #if (WILLUSDEBUGX & 1) printf(" justflags[%d]=0x%2X, centered=%d, indented=%d\n",i-i1,justflags,centered,indented[i-i1]); #endif if (allow_text_wrapping) { /* If this line is indented or if the justification has changed, */ /* then start a new line. */ if (centered || indented[i - i1] || (i > i1 && (just[i - i1] & 0xc) != (just[i - i1 - 1] & 0xc))) { #ifdef WILLUSDEBUG printf("wrapflush4\n"); #endif wrapbmp_flush(masterinfo, 0, pageinfo, 1); } #ifdef WILLUSDEBUG printf(" c1=%d, c2=%d\n",newregion->c1,newregion->c2); #endif marking_flags = 0xc | (i == i1 ? 0 : 1) | (i == i2 ? 0 : 2); bmpregion_one_row_wrap_and_add(newregion, breakinfo, i, i1, i2, masterinfo, justflags, colcount, rowcount, pageinfo, line_spacing, mean_row_gap, textrow->rowbase, marking_flags, indented[i - i1]); if (centered || short_line[i - i1]) { #ifdef WILLUSDEBUG printf("wrapflush5\n"); #endif wrapbmp_flush(masterinfo, 0, pageinfo, 2); } continue; } #ifdef WILLUSDEBUG printf("wrapflush5a\n"); #endif wrapbmp_flush(masterinfo, 0, pageinfo, 1); /* If default justifications, ignore all analysis and just center it. */ if (dst_justify < 0 && dst_fulljustify < 0) { newregion->c1 = region->c1; newregion->c2 = region->c2; justflags = 0xad; /* Force centered region, no justification */ trimflags = 0x80; } else trimflags = 0; /* No wrapping: text wrap, trim flags, vert breaks, fscale, just */ bmpregion_add(newregion, breakinfo, masterinfo, 0, trimflags, 0, force_scale, justflags, 5, colcount, rowcount, pageinfo, 0, textrow->r2 - textrow->rowbase); if (vertical_line_spacing < 0) { int gap1; gap1 = line_spacing - (textrow->r2 - textrow->r1 + 1); if (i < i2) gap = textrow->gap > gap1 ? gap1 : textrow->gap; else { gap = textrow->rowheight - (textrow->rowbase + last_rowbase_internal); if (gap < mean_row_gap / 2.) gap = mean_row_gap; } } else { gap = line_spacing - (textrow->r2 - textrow->r1 + 1); if (gap < mean_row_gap / 2.) gap = mean_row_gap; } if (i < i2) dst_add_gap_src_pixels("No-wrap line", masterinfo, gap); else { last_h5050_internal = textrow->h5050; beginning_gap_internal = gap; } } willus_dmem_free(14, (double **) &just, funcname); willus_dmem_free(13, (double **) &c1, funcname); #ifdef WILLUSDEBUG printf("Done wrap_and_add.\n"); #endif } static int bmpregion_is_centered(BMPREGION *region, BREAKINFO *breakinfo, int i1, int i2, int *th) { int j, i, cc, n1, ntr; int textheight; #if (WILLUSDEBUGX & 1) printf("@bmpregion_is_centered: region=(%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2); printf(" nrows = %d\n",i2-i1+1); #endif ntr = i2 - i1 + 1; for (j = 0; j < 3; j++) { for (n1 = textheight = 0, i = i1; i <= i2; i++) { TEXTROW *textrow; double ar, rh; textrow = &breakinfo->textrow[i]; if (textrow->c2 < textrow->c1) ar = 100.; else ar = (double) (textrow->r2 - textrow->r1 + 1) / (double) (textrow->c2 - textrow->c1 + 1); rh = (double) (textrow->r2 - textrow->r1 + 1) / src_dpi; if (j == 2 || (j >= 1 && rh <= no_wrap_height_limit_inches) || (j == 0 && rh <= no_wrap_height_limit_inches && ar <= no_wrap_ar_limit)) { textheight += textrow->rowbase - textrow->r1 + 1; n1++; } } if (n1 > 0) break; } textheight = (int) ((double) textheight / n1 + .5); if (th != NULL) { (*th) = textheight; #if (WILLUSDEBUGX & 1) printf(" textheight assigned (%d)\n",textheight); #endif return (breakinfo->centered); } /* ** Does region appear to be centered? */ for (cc = 0, i = i1; i <= i2; i++) { double indent1, indent2; #if (WILLUSDEBUGX & 1) printf(" tr[%d].c1,c2 = %d, %d\n",i,breakinfo->textrow[i].c1,breakinfo->textrow[i].c2); #endif indent1 = (double) (breakinfo->textrow[i].c1 - region->c1) / textheight; indent2 = (double) (region->c2 - breakinfo->textrow[i].c2) / textheight; #if (WILLUSDEBUGX & 1) printf(" tr[%d].indent1,2 = %g, %g\n",i,indent1,indent2); #endif /* If only one line and it spans the entire region, call it centered */ /* Sometimes this won't be the right thing to to. */ if (i1 == i2 && indent1 < .5 && indent2 < .5) { #if (WILLUSDEBUGX & 1) printf(" One line default to bigger region (%s).\n",breakinfo->centered?"not centered":"centered"); #endif return (1); } if (fabs(indent1 - indent2) > 1.5) { #if (WILLUSDEBUGX & 1) printf(" Region not centered.\n"); #endif return (0); } if (indent1 > 1.0) cc++; } #if (WILLUSDEBUGX & 1) printf("Region centering: i=%d, i2=%d, cc=%d, ntr=%d\n",i,i2,cc,ntr); #endif if (cc > ntr / 2) { #if (WILLUSDEBUGX & 1) printf(" Region is centered (enough obviously centered lines).\n"); #endif return (1); } #if (WILLUSDEBUGX & 1) printf(" Not centered (not enough obviously centered lines).\n"); #endif return (0); } /* array.c */ /* ** ** Compute mean and standard deviation ** */ double array_mean(double *a, int n, double *mean, double *stddev) { int i; double sum, avg, sum_sq; if (n < 1) return (0.); for (sum = sum_sq = i = 0; i < n; i++) sum += a[i]; avg = sum / n; if (mean != NULL) (*mean) = avg; if (stddev != NULL) { double sum_sq; for (sum_sq = i = 0; i < n; i++) sum_sq += (a[i] - avg) * (a[i] - avg); (*stddev) = sqrt(sum_sq / n); } return (avg); } /* ** CAUTION: This function re-orders the x[] array! */ static double median_val(double *x, int n) { int i1, n1; if (n < 4) return (array_mean(x, n, NULL, NULL)); sortd(x, n); if (n == 4) { n1 = 2; i1 = 1; } else if (n == 5) { n1 = 3; i1 = 1; } else { n1 = n / 3; i1 = (n - n1) / 2; } return (array_mean(&x[i1], n1, NULL, NULL)); } /* ** ** Searches the region for vertical break points and stores them into ** the BREAKINFO structure. ** ** apsize_in = averaging aperture size in inches. Use -1 for dynamic aperture. ** */ static void bmpregion_find_vertical_breaks(BMPREGION *region, BREAKINFO *breakinfo, int *colcount, int *rowcount, double apsize_in) { static char *funcname = "bmpregion_find_vertical_breaks"; int nr, i, brc, brcmin, dtrc, trc, aperture, aperturemax, figrow, labelrow; int ntr, rhmin_pix; BMPREGION *newregion, _newregion; int *rowthresh; double min_fig_height, max_fig_gap, max_label_height; min_fig_height = dst_min_figure_height_in; max_fig_gap = 0.16; max_label_height = 0.5; /* Trim region and populate colcount/rowcount arrays */ bmpregion_trim_margins(region, colcount, rowcount, 0xf); newregion = &_newregion; (*newregion) = (*region); if (debug) printf("@bmpregion_find_vertical_breaks: (%d,%d) - (%d,%d)\n", region->c1, region->r1, region->c2, region->r2); /* ** brc = consecutive blank pixel rows ** trc = consecutive non-blank pixel rows ** dtrc = number of non blank pixel rows since last dump */ nr = region->r2 - region->r1 + 1; willus_dmem_alloc_warn(15, (void **) &rowthresh, sizeof(int) * nr, funcname, 10); brcmin = max_vertical_gap_inches * src_dpi; aperturemax = (int) (src_dpi / 72. + .5); if (aperturemax < 2) aperturemax = 2; aperture = (int) (src_dpi * apsize_in + .5); /* for (i=region->r1;i<=region->r2;i++) printf("rowcount[%d]=%d\n",i,rowcount[i]); */ breakinfo->rhmean_pixels = 0; // Mean text row height ntr = 0; // Number of text rows /* Fill rowthresh[] array */ for (dtrc = 0, i = region->r1; i <= region->r2; i++) { int ii, i1, i2, sum, pt; if (apsize_in < 0.) { aperture = (int) (dtrc / 13.7 + .5); if (aperture > aperturemax) aperture = aperturemax; if (aperture < 2) aperture = 2; } i1 = i - aperture / 2; i2 = i1 + aperture - 1; if (i1 < region->r1) i1 = region->r1; if (i2 > region->r2) i2 = region->r2; pt = (int) ((i2 - i1 + 1) * gtr_in * src_dpi + .5); /* pixel count threshold */ if (pt < 1) pt = 1; /* Sum over row aperture */ for (sum = 0, ii = i1; ii <= i2; sum += rowcount[ii], ii++) ; /* Does row have few enough black pixels to be considered blank? */ if ((rowthresh[i - region->r1] = 10 * sum / pt) <= 40) { if (dtrc > 0) { breakinfo->rhmean_pixels += dtrc; ntr++; } dtrc = 0; } else dtrc++; } if (dtrc > 0) { breakinfo->rhmean_pixels += dtrc; ntr++; } if (ntr > 0) breakinfo->rhmean_pixels /= ntr; /* printf("rhmean=%d (ntr=%d)\n",breakinfo->rhmean_pixels,ntr); { FILE *f; static int count=0; f=fopen("rthresh.ep",count==0?"w":"a"); count++; for (i=region->r1;i<=region->r2;i++) nprintf(f,"%d\n",rowthresh[i-region->r1]); nprintf(f,"//nc\n"); fclose(f); } */ /* Minimum text row height required (pixels) */ rhmin_pix = breakinfo->rhmean_pixels / 3; if (rhmin_pix < .04 * src_dpi) rhmin_pix = .04 * src_dpi; if (rhmin_pix > .13 * src_dpi) rhmin_pix = .13 * src_dpi; if (rhmin_pix < 1) rhmin_pix = 1; /* for (rmax=region->r2;rmax>region->r1;rmax--) if (rowthresh[rmax-region->r1]>10) break; */ /* Look for "row" gaps in the region so that it can be broken into */ /* multiple "rows". */ breakinfo->n = 0; for (labelrow = figrow = -1, dtrc = trc = brc = 0, i = region->r1; i <= region->r2; i++) { /* Does row have few enough black pixels to be considered blank? */ if (rowthresh[i - region->r1] <= 10) { trc = 0; brc++; /* ** Max allowed white space between rows = max_vertical_gap_inches */ if (dtrc == 0) { if (brc > brcmin) newregion->r1++; continue; } /* ** Big enough blank gap, so add one row / line */ if (dtrc + brc >= rhmin_pix) { int i0, iopt; double region_height_inches; double gap_inches; if (dtrc < src_dpi * 0.02) dtrc = src_dpi * 0.02; if (dtrc < 2) dtrc = 2; /* Look for more optimum point */ for (i0 = iopt = i; i <= region->r2 && i - i0 < dtrc; i++) { if (rowthresh[i - region->r1] < rowthresh[iopt - region->r1]) { iopt = i; if (rowthresh[i - region->r1] == 0) break; } if (rowthresh[i - region->r1] > 100) break; } /* If at end of region and haven't found perfect break, stay at end */ if (i > region->r2 && rowthresh[iopt - region->r1] > 0) i = region->r2; else i = iopt; newregion->r2 = i - 1; region_height_inches = (double) (newregion->r2 - newregion->r1 + 1) / src_dpi; /* Could this region be a figure? */ if (figrow < 0 && region_height_inches >= min_fig_height) { /* If so, set figrow and don't process it yet. */ figrow = newregion->r1; labelrow = -1; newregion->r1 = i; dtrc = trc = 0; brc = 1; continue; } /* Are we processing a figure? */ if (figrow >= 0) { /* Compute most recent gap */ if (labelrow >= 0) gap_inches = (double) (labelrow - newregion->r1) / src_dpi; else gap_inches = -1.; /* If gap and region height are small enough, tack them on to the figure. */ if (region_height_inches < max_label_height && gap_inches > 0. && gap_inches < max_fig_gap) newregion->r1 = figrow; else { /* Not small enough--dump the previous figure. */ newregion->r2 = newregion->r1 - 1; newregion->r1 = figrow; newregion->c1 = region->c1; newregion->c2 = region->c2; bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f); if (newregion->r2 > newregion->r1) textrow_assign_bmpregion( &breakinfo->textrow[breakinfo->n++], newregion); if (gap_inches > 0. && gap_inches < max_fig_gap) { /* This new region might be a figure--set it as the new figure */ /* and don't dump it yet. */ figrow = newregion->r2 + 1; labelrow = -1; newregion->r1 = i; dtrc = trc = 0; brc = 1; continue; } else { newregion->r1 = newregion->r2 + 1; newregion->r2 = i - 1; } } /* Cancel figure processing */ figrow = -1; labelrow = -1; } /* if (newregion->r2 >= rmax) i=newregion->r2=region->r2; */ newregion->c1 = region->c1; newregion->c2 = region->c2; bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f); if (newregion->r2 > newregion->r1) textrow_assign_bmpregion( &breakinfo->textrow[breakinfo->n++], newregion); newregion->r1 = i; dtrc = trc = 0; brc = 1; } } else { if (figrow >= 0 && labelrow < 0) labelrow = i; dtrc++; trc++; brc = 0; } } newregion->r2 = region->r2; if (dtrc > 0 && newregion->r2 - newregion->r1 + 1 > 0) { /* If we were processing a figure, include it. */ if (figrow >= 0) newregion->r1 = figrow; newregion->c1 = region->c1; newregion->c2 = region->c2; bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f); if (newregion->r2 > newregion->r1) textrow_assign_bmpregion(&breakinfo->textrow[breakinfo->n++], newregion); } /* Compute gaps between rows and row heights */ breakinfo_compute_row_gaps(breakinfo, region->r2); willus_dmem_free(15, (double **) &rowthresh, funcname); } static void textrow_assign_bmpregion(TEXTROW *textrow, BMPREGION *region) { textrow->r1 = region->r1; textrow->r2 = region->r2; textrow->c1 = region->c1; textrow->c2 = region->c2; textrow->rowbase = region->rowbase; textrow->lcheight = region->lcheight; textrow->capheight = region->capheight; textrow->h5050 = region->h5050; } static void breakinfo_compute_row_gaps(BREAKINFO *breakinfo, int r2) { int i, n; n = breakinfo->n; if (n <= 0) return; breakinfo->textrow[0].rowheight = breakinfo->textrow[0].r2 - breakinfo->textrow[0].r1; for (i = 0; i < n - 1; i++) breakinfo->textrow[i].gap = breakinfo->textrow[i + 1].r1 - breakinfo->textrow[i].rowbase - 1; /* breakinfo->textrow[i].rowheight = breakinfo->textrow[i+1].r1 - breakinfo->textrow[i].r1; */ for (i = 1; i < n; i++) breakinfo->textrow[i].rowheight = breakinfo->textrow[i].rowbase - breakinfo->textrow[i - 1].rowbase; breakinfo->textrow[n - 1].gap = r2 - breakinfo->textrow[n - 1].rowbase; } static void breakinfo_compute_col_gaps(BREAKINFO *breakinfo, int c2) { int i, n; n = breakinfo->n; if (n <= 0) return; for (i = 0; i < n - 1; i++) { breakinfo->textrow[i].gap = breakinfo->textrow[i + 1].c1 - breakinfo->textrow[i].c2 - 1; breakinfo->textrow[i].rowheight = breakinfo->textrow[i + 1].c1 - breakinfo->textrow[i].c1; } breakinfo->textrow[n - 1].gap = c2 - breakinfo->textrow[n - 1].c2; breakinfo->textrow[n - 1].rowheight = breakinfo->textrow[n - 1].c2 - breakinfo->textrow[n - 1].c1; } static void breakinfo_remove_small_col_gaps(BREAKINFO *breakinfo, int lcheight, double mingap) { int i, j; if (mingap < word_spacing) mingap = word_spacing; for (i = 0; i < breakinfo->n - 1; i++) { double gap; gap = (double) breakinfo->textrow[i].gap / lcheight; if (gap >= mingap) continue; breakinfo->textrow[i].c2 = breakinfo->textrow[i + 1].c2; breakinfo->textrow[i].gap = breakinfo->textrow[i + 1].gap; if (breakinfo->textrow[i + 1].r1 < breakinfo->textrow[i].r1) breakinfo->textrow[i].r1 = breakinfo->textrow[i + 1].r1; if (breakinfo->textrow[i + 1].r2 > breakinfo->textrow[i].r2) breakinfo->textrow[i].r2 = breakinfo->textrow[i + 1].r2; for (j = i + 1; j < breakinfo->n - 1; j++) breakinfo->textrow[j] = breakinfo->textrow[j + 1]; breakinfo->n--; i--; } } static void breakinfo_remove_small_rows(BREAKINFO *breakinfo, double fracrh, double fracgap, BMPREGION *region, int *colcount, int *rowcount) { int i, j, mg, mh, mg0, mg1; int c1, c2, nc; int *rh, *gap; static char *funcname = "breakinfo_remove_small_rows"; #if (WILLUSDEBUGX & 2) printf("@breakinfo_remove_small_rows(fracrh=%g,fracgap=%g)\n",fracrh,fracgap); #endif if (breakinfo->n < 2) return; c1 = region->c1; c2 = region->c2; nc = c2 - c1 + 1; willus_dmem_alloc_warn(16, (void **) &rh, 2 * sizeof(int) * breakinfo->n, funcname, 10); gap = &rh[breakinfo->n]; for (i = 0; i < breakinfo->n; i++) { rh[i] = breakinfo->textrow[i].r2 - breakinfo->textrow[i].r1 + 1; if (i < breakinfo->n - 1) gap[i] = breakinfo->textrow[i].gap; } sorti(rh, breakinfo->n); sorti(gap, breakinfo->n - 1); mh = rh[breakinfo->n / 2]; mh *= fracrh; if (mh < 1) mh = 1; mg0 = gap[(breakinfo->n - 1) / 2]; mg = mg0 * fracgap; mg1 = mg0 * 0.7; if (mg < 1) mg = 1; #if (WILLUSDEBUGX & 2) printf("mh = %d x %g = %d\n",rh[breakinfo->n/2],fracrh,mh); printf("mg = %d x %g = %d\n",gap[breakinfo->n/2],fracgap,mg); #endif for (i = 0; i < breakinfo->n; i++) { TEXTROW *textrow; int trh, gs1, gs2, g1, g2, gap_is_big, row_too_small; double m1, m2, row_width_inches; textrow = &breakinfo->textrow[i]; trh = textrow->r2 - textrow->r1 + 1; if (i == 0) { g1 = mg0 + 1; gs1 = mg + 1; } else { g1 = textrow->r1 - breakinfo->textrow[i - 1].r2 - 1; gs1 = breakinfo->textrow[i - 1].gap; } if (i == breakinfo->n - 1) { g2 = mg0 + 1; gs2 = mg + 1; } else { g2 = breakinfo->textrow[i + 1].r1 - textrow->r2 - 1; gs2 = breakinfo->textrow[i].gap; } #if (WILLUSDEBUGX & 2) printf(" rowheight[%d] = %d, mh=%d, gs1=%d, gs2=%d\n",i,trh,gs1,gs2); #endif gap_is_big = (trh >= mh || (gs1 >= mg && gs2 >= mg)); /* ** Is the row width small and centered? If so, it should probably ** be attached to its nearest neighbor--it's usually a fragment of ** an equation or a table/figure. */ row_width_inches = (double) (textrow->c2 - textrow->c1 + 1) / src_dpi; m1 = fabs(textrow->c1 - c1) / nc; m2 = fabs(textrow->c2 - c2) / nc; row_too_small = m1 > 0.1 && m2 > 0.1 && row_width_inches < little_piece_threshold_inches && (g1 <= mg1 || g2 <= mg1); #if (WILLUSDEBUGX & 2) printf(" m1=%g, m2=%g, rwi=%g, g1=%d, g2=%d, mg0=%d\n",m1,m2,row_width_inches,g1,g2,mg0); #endif if (gap_is_big && !row_too_small) continue; #if (WILLUSDEBUGX & 2) printf(" row[%d] to be combined w/next row.\n",i); #endif if (row_too_small) { if (g1 < g2) i--; } else { if (gs1 < gs2) i--; } /* printf("Removing row. nrows=%d, rh=%d, gs1=%d, gs2=%d\n",breakinfo->n,trh,gs1,gs2); printf(" mh = %d, mg = %d\n",rh[breakinfo->n/2],gap[(breakinfo->n-1)/2]); */ breakinfo->textrow[i].r2 = breakinfo->textrow[i + 1].r2; if (breakinfo->textrow[i + 1].c2 > breakinfo->textrow[i].c2) breakinfo->textrow[i].c2 = breakinfo->textrow[i + 1].c2; if (breakinfo->textrow[i + 1].c1 < breakinfo->textrow[i].c1) breakinfo->textrow[i].c1 = breakinfo->textrow[i + 1].c1; /* Re-compute rowbase, capheight, lcheight */ { BMPREGION newregion; newregion = (*region); newregion.c1 = breakinfo->textrow[i].c1; newregion.c2 = breakinfo->textrow[i].c2; newregion.r1 = breakinfo->textrow[i].r1; newregion.r2 = breakinfo->textrow[i].r2; bmpregion_trim_margins(&newregion, colcount, rowcount, 0x1f); newregion.c1 = breakinfo->textrow[i].c1; newregion.c2 = breakinfo->textrow[i].c2; newregion.r1 = breakinfo->textrow[i].r1; newregion.r2 = breakinfo->textrow[i].r2; textrow_assign_bmpregion(&breakinfo->textrow[i], &newregion); } for (j = i + 1; j < breakinfo->n - 1; j++) breakinfo->textrow[j] = breakinfo->textrow[j + 1]; breakinfo->n--; i--; } willus_dmem_free(16, (double **) &rh, funcname); } static void breakinfo_alloc(int index, BREAKINFO *breakinfo, int nrows) { static char *funcname = "breakinfo_alloc"; willus_dmem_alloc_warn(index, (void **) &breakinfo->textrow, sizeof(TEXTROW) * (nrows / 2 + 2), funcname, 10); } static void breakinfo_free(int index, BREAKINFO *breakinfo) { static char *funcname = "breakinfo_free"; willus_dmem_free(index, (double **) &breakinfo->textrow, funcname); } static void breakinfo_sort_by_gap(BREAKINFO *breakinfo) { int n, top, n1; TEXTROW *x, x0; x = breakinfo->textrow; n = breakinfo->n; if (n < 2) return; top = n / 2; n1 = n - 1; while (1) { if (top > 0) { top--; x0 = x[top]; } else { x0 = x[n1]; x[n1] = x[0]; n1--; if (!n1) { x[0] = x0; return; } } { int parent, child; parent = top; child = top * 2 + 1; while (child <= n1) { if (child < n1 && x[child].gap < x[child + 1].gap) child++; if (x0.gap < x[child].gap) { x[parent] = x[child]; parent = child; child += (parent + 1); } else break; } x[parent] = x0; } } } static void breakinfo_sort_by_row_position(BREAKINFO *breakinfo) { int n, top, n1; TEXTROW *x, x0; x = breakinfo->textrow; n = breakinfo->n; if (n < 2) return; top = n / 2; n1 = n - 1; while (1) { if (top > 0) { top--; x0 = x[top]; } else { x0 = x[n1]; x[n1] = x[0]; n1--; if (!n1) { x[0] = x0; return; } } { int parent, child; parent = top; child = top * 2 + 1; while (child <= n1) { if (child < n1 && x[child].r1 < x[child + 1].r1) child++; if (x0.r1 < x[child].r1) { x[parent] = x[child]; parent = child; child += (parent + 1); } else break; } x[parent] = x0; } } } /* ** Add a vertically-contiguous rectangular region to the destination bitmap. ** The rectangular region may be broken up horizontally (wrapped). */ static void bmpregion_one_row_find_breaks(BMPREGION *region, BREAKINFO *breakinfo, int *colcount, int *rowcount, int add_to_dbase) { int nc, i, mingap, col0, dr, thlow, thhigh; int *bp; BMPREGION *newregion, _newregion; static char *funcname = "bmpregion_one_row_find_breaks"; if (debug) printf("@bmpregion_one_row_find_breaks(%d,%d)-(%d,%d)\n", region->c1, region->r1, region->c2, region->r2); newregion = &_newregion; (*newregion) = (*region); bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f); region->lcheight = newregion->lcheight; region->capheight = newregion->capheight; region->rowbase = newregion->rowbase; region->h5050 = newregion->h5050; nc = newregion->c2 - newregion->c1 + 1; breakinfo->n = 0; if (nc < 6) return; /* ** Look for "space-sized" gaps, i.e. gaps that would occur between words. ** Use this as pixel counting aperture. */ dr = newregion->lcheight; mingap = dr * word_spacing * 0.8; if (mingap < 2) mingap = 2; /* ** Find places where there are gaps (store in bp array) ** Could do this more intelligently--maybe calculate a histogram? */ willus_dmem_alloc_warn(18, (void **) &bp, sizeof(int) * nc, funcname, 10); for (i = 0; i < nc; i++) bp[i] = 0; if (src_left_to_right) { for (i = newregion->c1; i <= newregion->c2; i++) { int i1, i2, pt, sum, ii; i1 = i - mingap / 2; i2 = i1 + mingap - 1; if (i1 < newregion->c1) i1 = newregion->c1; if (i2 > newregion->c2) i2 = newregion->c2; pt = (int) ((i2 - i1 + 1) * gtw_in * src_dpi + .5); if (pt < 1) pt = 1; for (sum = 0, ii = i1; ii <= i2; ii++, sum += colcount[ii]) ; bp[i - newregion->c1] = 10 * sum / pt; } } else { for (i = newregion->c2; i >= newregion->c1; i--) { int i1, i2, pt, sum, ii; i1 = i - mingap / 2; i2 = i1 + mingap - 1; if (i1 < newregion->c1) i1 = newregion->c1; if (i2 > newregion->c2) i2 = newregion->c2; pt = (int) ((i2 - i1 + 1) * gtw_in * src_dpi + .5); if (pt < 1) pt = 1; for (sum = 0, ii = i1; ii <= i2; ii++, sum += colcount[ii]) ; bp[i - newregion->c1] = 10 * sum / pt; } } #if (WILLUSDEBUGX & 4) if (region->r1 > 3699 && region->r1<3750) { static int a=0; FILE *f; f=fopen("outbp.ep",a==0?"w":"a"); a++; fprintf(f,"/sa l \"(%d,%d)-(%d,%d) lch=%d\" 2\n",region->c1,region->r1,region->c2,region->r2,region->lcheight); for (i=0;ic1; col0 <= newregion->c2; col0++) { int copt, c0; BMPREGION xregion; xregion = (*newregion); xregion.c1 = col0; for (; col0 <= newregion->c2; col0++) if (bp[col0 - newregion->c1] >= thhigh) break; if (col0 > newregion->c2) break; for (col0++; col0 <= newregion->c2; col0++) if (bp[col0 - newregion->c1] < thlow) break; for (copt = c0 = col0; col0 <= newregion->c2 && col0 - c0 <= dr; col0++) { if (bp[col0 - newregion->c1] < bp[copt - newregion->c1]) copt = col0; if (bp[col0 - newregion->c1] > thhigh) break; } if (copt > newregion->c2) copt = newregion->c2; xregion.c2 = copt; if (xregion.c2 - xregion.c1 < 2) continue; bmpregion_trim_margins(&xregion, colcount, rowcount, 0x1f); textrow_assign_bmpregion(&breakinfo->textrow[breakinfo->n++], &xregion); col0 = copt; if (copt == newregion->c2) break; } breakinfo_compute_col_gaps(breakinfo, newregion->c2); willus_dmem_free(18, (double **) &bp, funcname); /* Remove small gaps */ { double median_gap; word_gaps_add(add_to_dbase ? breakinfo : NULL, region->lcheight, &median_gap); breakinfo_remove_small_col_gaps(breakinfo, region->lcheight, median_gap / 1.9); } } /* ** pi = preserve indentation */ static void bmpregion_one_row_wrap_and_add(BMPREGION *region, BREAKINFO *rowbreakinfo, int index, int i1, int i2, MASTERINFO *masterinfo, int justflags, int *colcount, int *rowcount, PAGEINFO *pageinfo, int line_spacing, int mean_row_gap, int rowbase, int marking_flags, int pi) { int nc, nr, i, i0, gappix; double aspect_ratio, region_height; BREAKINFO *colbreaks, _colbreaks; BMPREGION *newregion, _newregion; #if (WILLUSDEBUGX & 4) printf("@bmpregion_one_row_wrap_and_add, index=%d, i1=%d, i2=%d\n",index,i1,i2); #endif newregion = &_newregion; (*newregion) = (*region); bmpregion_trim_margins(newregion, colcount, rowcount, 0xf); nc = newregion->c2 - newregion->c1 + 1; nr = newregion->r2 - newregion->r1 + 1; if (nc < 6) return; aspect_ratio = (double) nr / nc; region_height = (double) nr / src_dpi; if (aspect_ratio > no_wrap_ar_limit && region_height > no_wrap_height_limit_inches) { newregion->r1 = region->r1; newregion->r2 = region->r2; #ifdef WILLUSDEBUG printf("wrapflush6\n"); #endif wrapbmp_flush(masterinfo, 0, pageinfo, 1); if (index > i1) dst_add_gap_src_pixels("Tall region", masterinfo, rowbreakinfo->textrow[index - 1].gap); bmpregion_add(newregion, rowbreakinfo, masterinfo, 0, 0xf, 0, -1.0, 0, 2, colcount, rowcount, pageinfo, 0xf, rowbreakinfo->textrow[index].r2 - rowbreakinfo->textrow[index].rowbase); if (index < i2) gap_override_internal = rowbreakinfo->textrow[index].gap; return; } colbreaks = &_colbreaks; colbreaks->textrow = NULL; breakinfo_alloc(106, colbreaks, newregion->c2 - newregion->c1 + 1); bmpregion_one_row_find_breaks(newregion, colbreaks, colcount, rowcount, 1); if (pi && colbreaks->n > 0) { if (src_left_to_right) colbreaks->textrow[0].c1 = region->c1; else colbreaks->textrow[colbreaks->n - 1].c2 = region->c2; } /* hs=0.; for (i=0;in;i++) hs += (colbreaks->textrow[i].r2-colbreaks->textrow[i].r1); hs /= colbreaks->n; */ /* ** Find appropriate letter height to use for word spacing */ { double median_gap; word_gaps_add(NULL, newregion->lcheight, &median_gap); gappix = (int) (median_gap * newregion->lcheight + .5); } #if (WILLUSDEBUGX & 4) printf("Before small gap removal, column breaks:\n"); breakinfo_echo(colbreaks); #endif #if (WILLUSDEBUGX & 4) printf("After small gap removal, column breaks:\n"); breakinfo_echo(colbreaks); #endif if (show_marked_source) for (i = 0; i < colbreaks->n; i++) { BMPREGION xregion; xregion = (*newregion); xregion.c1 = colbreaks->textrow[i].c1; xregion.c2 = colbreaks->textrow[i].c2; mark_source_page(&xregion, 2, marking_flags); } #if (WILLUSDEBUGX & 4) for (i=0;in;i++) printf(" colbreak[%d] = %d - %d\n",i,colbreaks->textrow[i].c1,colbreaks->textrow[i].c2); #endif /* Maybe skip gaps < 0.5*median_gap or collect gap/rowheight ratios and skip small gaps */ /* (Could be thrown off by full-justified articles where some lines have big gaps.) */ /* Need do call a separate function that removes these gaps. */ for (i0 = 0; i0 < colbreaks->n;) { int i1, i2, toolong, rw, remaining_width_pixels; BMPREGION reg; toolong = 0; /* Avoid compiler warning */ for (i = i0; i < colbreaks->n; i++) { int wordgap; wordgap = wrapbmp_ends_in_hyphen() ? 0 : gappix; i1 = src_left_to_right ? i0 : colbreaks->n - 1 - i; i2 = src_left_to_right ? i : colbreaks->n - 1 - i0; rw = (colbreaks->textrow[i2].c2 - colbreaks->textrow[i1].c1 + 1); remaining_width_pixels = wrapbmp_remaining(); toolong = (rw + wordgap > remaining_width_pixels); #if (WILLUSDEBUGX & 4) printf(" i1=%d, i2=%d, rw=%d, rw+gap=%d, remainder=%d, toolong=%d\n",i1,i2,rw,rw+wordgap,remaining_width_pixels,toolong); #endif /* ** If we're too long with just one word and there is already ** stuff on the queue, then flush it and re-evaluate. */ if (i == i0 && toolong && wrapbmp_width() > 0) { #ifdef WILLUSDEBUG printf("wrapflush8\n"); #endif wrapbmp_flush(masterinfo, 1, pageinfo, 0); i--; continue; } /* ** If we're not too long and we're not done yet, add another word. */ if (i < colbreaks->n - 1 && !toolong) continue; /* ** Add the regions from i0 to i (or i0 to i-1) */ break; } if (i > i0 && toolong) i--; i1 = src_left_to_right ? i0 : colbreaks->n - 1 - i; i2 = src_left_to_right ? i : colbreaks->n - 1 - i0; reg = (*newregion); reg.c1 = colbreaks->textrow[i1].c1; reg.c2 = colbreaks->textrow[i2].c2; #if (WILLUSDEBUGX & 4) printf(" Adding i1=%d to i2=%d\n",i1,i2); #endif /* Trim the word top/bottom */ bmpregion_trim_margins(®, colcount, rowcount, 0xc); reg.c1 = colbreaks->textrow[i1].c1; reg.c2 = colbreaks->textrow[i2].c2; reg.lcheight = newregion->lcheight; reg.capheight = newregion->capheight; reg.rowbase = newregion->rowbase; reg.h5050 = newregion->h5050; if (reg.r1 > reg.rowbase) reg.r1 = reg.rowbase; if (reg.r2 < reg.rowbase) reg.r2 = reg.rowbase; /* Add it to the existing line queue */ wrapbmp_add(®, gappix, line_spacing, rowbase, mean_row_gap, justflags); if (toolong) { #ifdef WILLUSDEBUG printf("wrapflush7\n"); #endif wrapbmp_flush(masterinfo, 1, pageinfo, 0); } i0 = i + 1; } breakinfo_free(106, colbreaks); } static WILLUSBITMAP _wrapbmp, *wrapbmp; static int wrapbmp_base; static int wrapbmp_line_spacing; static int wrapbmp_gap; static int wrapbmp_bgcolor; static int wrapbmp_just; static int wrapbmp_rhmax; static int wrapbmp_thmax; static int wrapbmp_maxgap = 2; static int wrapbmp_height_extended; static HYPHENINFO wrapbmp_hyphen; void wrapbmp_init(void) { wrapbmp = &_wrapbmp; bmp_init(wrapbmp); wrapbmp_set_color(dst_color); wrapbmp->width = 0; wrapbmp->height = 0; wrapbmp_base = 0; wrapbmp_line_spacing = -1; wrapbmp_gap = -1; wrapbmp_bgcolor = -1; wrapbmp_height_extended = 0; wrapbmp_just = 0x8f; wrapbmp_rhmax = -1; wrapbmp_thmax = -1; wrapbmp_hyphen.ch = -1; just_flushed_internal = 0; beginning_gap_internal = -1; last_h5050_internal = -1; } static int wrapbmp_ends_in_hyphen(void) { return (wrapbmp_hyphen.ch >= 0); } static void wrapbmp_set_color(int is_color) { if (is_color) wrapbmp->bpp = 24; else { int i; wrapbmp->bpp = 8; for (i = 0; i < 256; i++) wrapbmp->red[i] = wrapbmp->blue[i] = wrapbmp->green[i] = i; } } static void wrapbmp_free(void) { bmp_free(wrapbmp); } static void wrapbmp_set_maxgap(int value) { wrapbmp_maxgap = value; } static int wrapbmp_width(void) { return (wrapbmp->width); } static int wrapbmp_remaining(void) { int maxpix, w; maxpix = max_region_width_inches * src_dpi; /* Don't include hyphen if wrapbmp ends in a hyphen */ if (wrapbmp_hyphen.ch < 0) w = wrapbmp->width; else if (src_left_to_right) w = wrapbmp_hyphen.c2 + 1; else w = wrapbmp->width - wrapbmp_hyphen.c2; return (maxpix - w); } /* ** region = bitmap region to add to line ** gap = horizontal pixel gap between existing region and region being added ** line_spacing = desired spacing between lines of text (pixels) ** rbase = position of baseline in region ** gio = gap if over--gap above top of text if it goes over line_spacing. */ // static int bcount=0; static void wrapbmp_add(BMPREGION *region, int gap, int line_spacing, int rbase, int gio, int just_flags) { WILLUSBITMAP *tmp, _tmp; int i, rh, th, bw, new_base, h2, bpp, width0; // static char filename[256]; #ifdef WILLUSDEBUG printf("@wrapbmp_add %d x %d (w=%d).\n",region->c2-region->c1+1,region->r2-region->r1+1,wrapbmp->width); #endif bmpregion_hyphen_detect(region); /* Figure out if what we're adding ends in a hyphen */ if (wrapbmp_ends_in_hyphen()) gap = 0; wrapbmp_hyphen_erase(); just_flushed_internal = 0; // Reset "just flushed" flag beginning_gap_internal = -1; // Reset top-of-page or top-of-column gap last_h5050_internal = -1; // Reset last row font size if (line_spacing > wrapbmp_line_spacing) wrapbmp_line_spacing = line_spacing; if (gio > wrapbmp_gap) wrapbmp_gap = gio; wrapbmp_bgcolor = region->bgcolor; wrapbmp_just = just_flags; /* printf(" c1=%d, c2=%d, r1=%d, r2=%d\n",region->c1,region->c2,region->r1,region->r2); printf(" gap=%d, line_spacing=%d, rbase=%d, gio=%d\n",gap,line_spacing,rbase,gio); */ bpp = dst_color ? 3 : 1; rh = rbase - region->r1 + 1; if (rh > wrapbmp_rhmax) wrapbmp_rhmax = rh; th = rh + (region->r2 - rbase); if (th > wrapbmp_thmax) wrapbmp_thmax = th; /* { WILLUSBITMAP *bmp,_bmp; bmp=&_bmp; bmp_init(bmp); bmp->height=region->r2-region->r1+1; bmp->width=region->c2-region->c1+1; bmp->bpp=bpp*8; if (bpp==1) for (i=0;i<256;i++) bmp->red[i]=bmp->blue[i]=bmp->green[i]=i; bmp_alloc(bmp); bw=bmp_bytewidth(bmp); memset(bmp_rowptr_from_top(bmp,0),255,bw*bmp->height); for (i=region->r1;i<=region->r2;i++) { unsigned char *d,*s; d=bmp_rowptr_from_top(bmp,i-region->r1); s=bmp_rowptr_from_top(dst_color?region->bmp:region->bmp8,i)+bpp*region->c1; if (i==rbase) memset(d,0,bw); else memcpy(d,s,bw); } sprintf(filename,"out%05d.png",bcount++); bmp_write(bmp,filename,stdout,100); bmp_free(bmp); } */ if (wrapbmp->width == 0) { /* Put appropriate gap in */ if (last_rowbase_internal >= 0 && rh < wrapbmp_line_spacing - last_rowbase_internal) { rh = wrapbmp_line_spacing - last_rowbase_internal; if (rh < 2) rh = 2; th = rh + (region->r2 - rbase); wrapbmp_height_extended = 0; } else wrapbmp_height_extended = (last_rowbase_internal >= 0); wrapbmp_base = rh - 1; wrapbmp->height = th; #ifdef WILLUSDEBUG printf("@wrapbmp_add: bmpheight set to %d (wls=%d, lrbi=%d)\n",wrapbmp->height,wrapbmp_line_spacing,last_rowbase_internal); #endif wrapbmp->width = region->c2 - region->c1 + 1; bmp_alloc(wrapbmp); bw = bmp_bytewidth(wrapbmp); memset(bmp_rowptr_from_top(wrapbmp, 0), 255, bw * wrapbmp->height); for (i = region->r1; i <= region->r2; i++) { unsigned char *d, *s; d = bmp_rowptr_from_top(wrapbmp, wrapbmp_base + (i - rbase)); s = bmp_rowptr_from_top(dst_color ? region->bmp : region->bmp8, i) + bpp * region->c1; memcpy(d, s, bw); } #ifdef WILLUSDEBUG if (wrapbmp->height<=wrapbmp_base) { printf("1. SCREEECH!\n"); printf("wrapbmp = %d x %d, base=%d\n",wrapbmp->width,wrapbmp->height,wrapbmp_base); exit(10); } #endif /* Copy hyphen info from added region */ wrapbmp_hyphen = region->hyphen; if (wrapbmp_ends_in_hyphen()) { wrapbmp_hyphen.r1 += (wrapbmp_base - rbase); wrapbmp_hyphen.r2 += (wrapbmp_base - rbase); wrapbmp_hyphen.ch -= region->c1; wrapbmp_hyphen.c2 -= region->c1; } return; } width0 = wrapbmp->width; /* Starting wrapbmp width */ tmp = &_tmp; bmp_init(tmp); bmp_copy(tmp, wrapbmp); tmp->width += gap + region->c2 - region->c1 + 1; if (rh > wrapbmp_base) { wrapbmp_height_extended = 1; new_base = rh - 1; } else new_base = wrapbmp_base; if (region->r2 - rbase > wrapbmp->height - 1 - wrapbmp_base) h2 = region->r2 - rbase; else h2 = wrapbmp->height - 1 - wrapbmp_base; tmp->height = new_base + h2 + 1; bmp_alloc(tmp); bw = bmp_bytewidth(tmp); memset(bmp_rowptr_from_top(tmp, 0), 255, bw * tmp->height); bw = bmp_bytewidth(wrapbmp); /* printf("3. wbh=%d x %d, tmp=%d x %d x %d, new_base=%d, wbbase=%d\n",wrapbmp->width,wrapbmp->height,tmp->width,tmp->height,tmp->bpp,new_base,wrapbmp_base); */ for (i = 0; i < wrapbmp->height; i++) { unsigned char *d, *s; d = bmp_rowptr_from_top(tmp, i + new_base - wrapbmp_base) + (src_left_to_right ? 0 : tmp->width - 1 - wrapbmp->width) * bpp; s = bmp_rowptr_from_top(wrapbmp, i); memcpy(d, s, bw); } bw = bpp * (region->c2 - region->c1 + 1); if (region->r1 + new_base - rbase < 0 || region->r2 + new_base - rbase > tmp->height - 1) { aprintf(ANSI_YELLOW "INTERNAL ERROR--TMP NOT DIMENSIONED PROPERLY.\n"); aprintf("(%d-%d), tmp->height=%d\n" ANSI_NORMAL, region->r1 + new_base - rbase, region->r2 + new_base - rbase, tmp->height); exit(10); } for (i = region->r1; i <= region->r2; i++) { unsigned char *d, *s; d = bmp_rowptr_from_top(tmp, i + new_base - rbase) + (src_left_to_right ? wrapbmp->width + gap : 0) * bpp; s = bmp_rowptr_from_top(dst_color ? region->bmp : region->bmp8, i) + bpp * region->c1; memcpy(d, s, bw); } bmp_copy(wrapbmp, tmp); bmp_free(tmp); /* Copy region's hyphen info */ wrapbmp_hyphen = region->hyphen; if (wrapbmp_ends_in_hyphen()) { wrapbmp_hyphen.r1 += (new_base - rbase); wrapbmp_hyphen.r2 += (new_base - rbase); if (src_left_to_right) { wrapbmp_hyphen.ch += width0 + gap - region->c1; wrapbmp_hyphen.c2 += width0 + gap - region->c1; } else { wrapbmp_hyphen.ch -= region->c1; wrapbmp_hyphen.c2 -= region->c1; } } wrapbmp_base = new_base; #ifdef WILLUSDEBUG if (wrapbmp->height<=wrapbmp_base) { printf("2. SCREEECH!\n"); printf("wrapbmp = %d x %d, base=%d\n",wrapbmp->width,wrapbmp->height,wrapbmp_base); exit(10); } #endif } static void wrapbmp_flush(MASTERINFO *masterinfo, int allow_full_justification, PAGEINFO *pageinfo, int use_bgi) { BMPREGION region; WILLUSBITMAP *bmp8, _bmp8; int gap, just, nomss, dh; int *colcount, *rowcount; static char *funcname = "wrapbmp_flush"; // char filename[256]; if (wrapbmp->width <= 0) { if (use_bgi == 1 && beginning_gap_internal > 0) dst_add_gap_src_pixels("wrapbmp_bgi0", masterinfo, beginning_gap_internal); beginning_gap_internal = -1; last_h5050_internal = -1; if (use_bgi) just_flushed_internal = 1; return; } #ifdef WILLUSDEBUG printf("@wrapbmp_flush()\n"); #endif /* { char filename[256]; int i; static int bcount=0; for (i=0;iheight;i++) { unsigned char *p; int j; p=bmp_rowptr_from_top(wrapbmp,i); for (j=0;jwidth;j++) if (p[j]>240) p[j]=192; } sprintf(filename,"out%05d.png",bcount++); bmp_write(wrapbmp,filename,stdout,100); } */ colcount = rowcount = NULL; willus_dmem_alloc_warn(19, (void **) &colcount, (wrapbmp->width + 16) * sizeof(int), funcname, 10); willus_dmem_alloc_warn(20, (void **) &rowcount, (wrapbmp->height + 16) * sizeof(int), funcname, 10); region.c1 = 0; region.c2 = wrapbmp->width - 1; region.r1 = 0; region.r2 = wrapbmp->height - 1; region.rowbase = wrapbmp_base; region.bmp = wrapbmp; region.bgcolor = wrapbmp_bgcolor; #ifdef WILLUSDEBUG printf("Bitmap is %d x %d (baseline=%d)\n",wrapbmp->width,wrapbmp->height,wrapbmp_base); #endif /* Sanity check on row spacing -- don't let it be too large. */ nomss = wrapbmp_rhmax * 1.7; /* Nominal single-spaced height for this row */ if (last_rowbase_internal < 0) dh = 0; else { dh = (int) (wrapbmp_line_spacing - last_rowbase_internal - 1.2 * fabs(vertical_line_spacing) * nomss + .5); if (vertical_line_spacing < 0.) { int dh1; if (wrapbmp_maxgap > 0) dh1 = region.rowbase + 1 - wrapbmp_rhmax - wrapbmp_maxgap; else dh1 = (int) (wrapbmp_line_spacing - last_rowbase_internal - 1.2 * nomss + .5); if (dh1 > dh) dh = dh1; } } if (dh > 0) { #ifdef WILLUSDEBUG aprintf(ANSI_YELLOW "dh > 0 = %d" ANSI_NORMAL "\n",dh); printf(" wrapbmp_line_spacing=%d\n",wrapbmp_line_spacing); printf(" nomss = %d\n",nomss); printf(" vls = %g\n",vertical_line_spacing); printf(" lrbi=%d\n",last_rowbase_internal); printf(" wrapbmp_maxgap=%d\n",wrapbmp_maxgap); printf(" wrapbmp_rhmax=%d\n",wrapbmp_rhmax); #endif region.r1 = dh; /* if (dh>200) { bmp_write(wrapbmp,"out.png",stdout,100); exit(10); } */ } if (wrapbmp->bpp == 24) { bmp8 = &_bmp8; bmp_init(bmp8); bmp_convert_to_greyscale_ex(bmp8, wrapbmp); region.bmp8 = bmp8; } else region.bmp8 = wrapbmp; if (gap_override_internal > 0) { region.r1 = wrapbmp_base - wrapbmp_rhmax + 1; if (region.r1 < 0) region.r1 = 0; if (region.r1 > wrapbmp_base) region.r1 = wrapbmp_base; gap = gap_override_internal; gap_override_internal = -1; } else { if (wrapbmp_height_extended) gap = wrapbmp_gap; else gap = 0; } #ifdef WILLUSDEBUG printf("wf: gap=%d\n",gap); #endif if (gap > 0) dst_add_gap_src_pixels("wrapbmp", masterinfo, gap); if (!allow_full_justification) just = (wrapbmp_just & 0xcf) | 0x20; else just = wrapbmp_just; bmpregion_add(®ion, NULL, masterinfo, 0, 0, 0, -1.0, just, 2, colcount, rowcount, pageinfo, 0xf, wrapbmp->height - 1 - wrapbmp_base); if (wrapbmp->bpp == 24) bmp_free(bmp8); willus_dmem_free(20, (double **) &rowcount, funcname); willus_dmem_free(19, (double **) &colcount, funcname); wrapbmp->width = 0; wrapbmp->height = 0; wrapbmp_line_spacing = -1; wrapbmp_gap = -1; wrapbmp_rhmax = -1; wrapbmp_thmax = -1; wrapbmp_hyphen.ch = -1; if (use_bgi == 1 && beginning_gap_internal > 0) dst_add_gap_src_pixels("wrapbmp_bgi1", masterinfo, beginning_gap_internal); beginning_gap_internal = -1; last_h5050_internal = -1; if (use_bgi) just_flushed_internal = 1; } static void wrapbmp_hyphen_erase(void) { WILLUSBITMAP *bmp, _bmp; int bw, bpp, c0, c1, c2, i; if (wrapbmp_hyphen.ch < 0) return; #if (WILLUSDEBUGX & 16) printf("@hyphen_erase, bmp=%d x %d x %d\n",wrapbmp->width,wrapbmp->height,wrapbmp->bpp); printf(" ch=%d, c2=%d, r1=%d, r2=%d\n",wrapbmp_hyphen.ch,wrapbmp_hyphen.c2,wrapbmp_hyphen.r1,wrapbmp_hyphen.r2); #endif bmp = &_bmp; bmp_init(bmp); bmp->bpp = wrapbmp->bpp; if (bmp->bpp == 8) for (i = 0; i < 256; i++) bmp->red[i] = bmp->blue[i] = bmp->green[i] = i; bmp->height = wrapbmp->height; if (src_left_to_right) { bmp->width = wrapbmp_hyphen.c2 + 1; c0 = 0; c1 = wrapbmp_hyphen.ch; c2 = bmp->width - 1; } else { bmp->width = wrapbmp->width - wrapbmp_hyphen.c2; c0 = wrapbmp_hyphen.c2; c1 = 0; c2 = wrapbmp_hyphen.ch - wrapbmp_hyphen.c2; } bmp_alloc(bmp); bpp = bmp->bpp == 24 ? 3 : 1; bw = bpp * bmp->width; for (i = 0; i < bmp->height; i++) memcpy(bmp_rowptr_from_top(bmp, i), bmp_rowptr_from_top(wrapbmp, i) + bpp * c0, bw); bw = (c2 - c1 + 1) * bpp; if (bw > 0) for (i = wrapbmp_hyphen.r1; i <= wrapbmp_hyphen.r2; i++) memset(bmp_rowptr_from_top(bmp, i) + bpp * c1, 255, bw); #if (WILLUSDEBUGX & 16) { static int count=1; char filename[256]; sprintf(filename,"be%04d.png",count); bmp_write(wrapbmp,filename,stdout,100); sprintf(filename,"ae%04d.png",count); bmp_write(bmp,filename,stdout,100); count++; } #endif bmp_copy(wrapbmp, bmp); bmp_free(bmp); } /* ** src is only allocated if dst_color != 0 */ static void white_margins(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey) { int i, n; BMPREGION *region, _region; region = &_region; region->bmp = srcgrey; get_white_margins(region); n = region->c1; for (i = 0; i < srcgrey->height; i++) { unsigned char *p; if (dst_color) { p = bmp_rowptr_from_top(src, i); memset(p, 255, n * 3); } p = bmp_rowptr_from_top(srcgrey, i); memset(p, 255, n); } n = srcgrey->width - 1 - region->c2; for (i = 0; i < srcgrey->height; i++) { unsigned char *p; if (dst_color) { p = bmp_rowptr_from_top(src, i) + 3 * (src->width - n); memset(p, 255, n * 3); } p = bmp_rowptr_from_top(srcgrey, i) + srcgrey->width - n; memset(p, 255, n); } n = region->r1; for (i = 0; i < n; i++) { unsigned char *p; if (dst_color) { p = bmp_rowptr_from_top(src, i); memset(p, 255, src->width * 3); } p = bmp_rowptr_from_top(srcgrey, i); memset(p, 255, srcgrey->width); } n = srcgrey->height - 1 - region->r2; for (i = srcgrey->height - n; i < srcgrey->height; i++) { unsigned char *p; if (dst_color) { p = bmp_rowptr_from_top(src, i); memset(p, 255, src->width * 3); } p = bmp_rowptr_from_top(srcgrey, i); memset(p, 255, srcgrey->width); } } static void get_white_margins(BMPREGION *region) { int n; double defval; defval = 0.25; if (mar_left < 0.) mar_left = defval; n = (int) (0.5 + mar_left * src_dpi); if (n > region->bmp->width) n = region->bmp->width; region->c1 = n; if (mar_right < 0.) mar_right = defval; n = (int) (0.5 + mar_right * src_dpi); if (n > region->bmp->width) n = region->bmp->width; region->c2 = region->bmp->width - 1 - n; if (mar_top < 0.) mar_top = defval; n = (int) (0.5 + mar_top * src_dpi); if (n > region->bmp->height) n = region->bmp->height; region->r1 = n; if (mar_bot < 0.) mar_bot = defval; n = (int) (0.5 + mar_bot * src_dpi); if (n > region->bmp->height) n = region->bmp->height; region->r2 = region->bmp->height - 1 - n; } /* ** bitmap_orientation() ** ** 1.0 means neutral ** ** >> 1.0 means document is likely portrait (no rotation necessary) ** (max is 100.) ** ** << 1.0 means document is likely landscape (need to rotate it) ** (min is 0.01) ** */ static double bitmap_orientation(WILLUSBITMAP *bmp) { int i, ic, wtcalc; double hsum, vsum, rat; wtcalc = -1; for (vsum = 0., hsum = 0., ic = 0, i = 20; i <= 85; i += 5, ic++) { double nv, nh; int wth, wtv; #ifdef DEBUG printf("h %d:\n",i); #endif if (ic == 0) wth = -1; else wth = wtcalc; wth = -1; nh = bmp_inflections_horizontal(bmp, 8, i, &wth); #ifdef DEBUG { FILE *f; f=fopen("inf.ep","a"); fprintf(f,"/ag\n"); fclose(f); } printf("v %d:\n",i); #endif if (ic == 0) wtv = -1; else wtv = wtcalc; wtv = -1; nv = bmp_inflections_vertical(bmp, 8, i, &wtv); if (ic == 0) { if (wtv > wth) wtcalc = wtv; else wtcalc = wth; continue; } // exit(10); hsum += nh * i * i * i; vsum += nv * i * i * i; } if (vsum == 0. && hsum == 0.) rat = 1.0; else if (hsum < vsum && hsum / vsum < .01) rat = 100.; else rat = vsum / hsum; if (rat < .01) rat = .01; // printf(" page %2d: %8.4f\n",pagenum,rat); // fprintf(out,"\t%8.4f",vsum/hsum); // fprintf(out,"\n"); return (rat); } static double bmp_inflections_vertical(WILLUSBITMAP *srcgrey, int ndivisions, int delta, int *wthresh) { int y0, y1, ny, i, nw, nisum, ni, wt, wtmax; double *g; char *funcname = "bmp_inflections_vertical"; nw = srcgrey->width / ndivisions; y0 = srcgrey->height / 6; y1 = srcgrey->height - y0; ny = y1 - y0; willus_dmem_alloc_warn(21, (void **) &g, ny * sizeof(double), funcname, 10); wtmax = -1; for (nisum = 0, i = 0; i < 10; i++) { int x0, x1, nx, j; x0 = (srcgrey->width - nw) * (i + 2) / 13; x1 = x0 + nw; if (x1 > srcgrey->width) x1 = srcgrey->width; nx = x1 - x0; for (j = y0; j < y1; j++) { int k, rsum; unsigned char *p; p = bmp_rowptr_from_top(srcgrey, j) + x0; for (rsum = k = 0; k < nx; k++, p++) rsum += p[0]; g[j - y0] = (double) rsum / nx; } wt = (*wthresh); ni = inflection_count(g, ny, delta, &wt); if ((*wthresh) < 0 && ni >= 3 && wt > wtmax) wtmax = wt; if (ni > nisum) nisum = ni; } willus_dmem_free(21, &g, funcname); if ((*wthresh) < 0) (*wthresh) = wtmax; return (nisum); } static double bmp_inflections_horizontal(WILLUSBITMAP *srcgrey, int ndivisions, int delta, int *wthresh) { int x0, x1, nx, bw, i, nh, nisum, ni, wt, wtmax; double *g; char *funcname = "bmp_inflections_vertical"; nh = srcgrey->height / ndivisions; x0 = srcgrey->width / 6; x1 = srcgrey->width - x0; nx = x1 - x0; bw = bmp_bytewidth(srcgrey); willus_dmem_alloc_warn(22, (void **) &g, nx * sizeof(double), funcname, 10); wtmax = -1; for (nisum = 0, i = 0; i < 10; i++) { int y0, y1, ny, j; y0 = (srcgrey->height - nh) * (i + 2) / 13; y1 = y0 + nh; if (y1 > srcgrey->height) y1 = srcgrey->height; ny = y1 - y0; for (j = x0; j < x1; j++) { int k, rsum; unsigned char *p; p = bmp_rowptr_from_top(srcgrey, y0) + j; for (rsum = k = 0; k < ny; k++, p += bw) rsum += p[0]; g[j - x0] = (double) rsum / ny; } wt = (*wthresh); ni = inflection_count(g, nx, delta, &wt); if ((*wthresh) < 0 && ni >= 3 && wt > wtmax) wtmax = wt; if (ni > nisum) nisum = ni; } willus_dmem_free(22, &g, funcname); if ((*wthresh) < 0) (*wthresh) = wtmax; return (nisum); } static int inflection_count(double *x, int n, int delta, int *wthresh) { int i, i0, ni, ww, c, ct, wt, mode; double meandi, meandisq, f1, f2, stdev; double *xs; static int hist[256]; static char *funcname = "inflection_count"; /* Find threshold white value that peaks must exceed */ if ((*wthresh) < 0) { for (i = 0; i < 256; i++) hist[i] = 0; for (i = 0; i < n; i++) { i0 = floor(x[i]); if (i0 > 255) i0 = 255; hist[i0]++; } ct = n * .15; for (c = 0, i = 255; i >= 0; i--) { c += hist[i]; if (c > ct) break; } wt = i - 10; if (wt < 192) wt = 192; #ifdef DEBUG printf("wt=%d\n",wt); #endif (*wthresh) = wt; } else wt = (*wthresh); ww = n / 150; if (ww < 1) ww = 1; willus_dmem_alloc_warn(23, (void **) &xs, sizeof(double) * n, funcname, 10); for (i = 0; i < n - ww; i++) { int j; for (xs[i] = 0., j = 0; j < ww; j++, xs[i] += x[i + j]) ; xs[i] /= ww; } meandi = meandisq = 0.; if (xs[0] <= wt - delta) mode = 1; else if (xs[0] >= wt) mode = -1; else mode = 0; for (i0 = 0, ni = 0, i = 1; i < n - ww; i++) { if (mode == 1 && xs[i] >= wt) { if (i0 > 0) { meandi += i - i0; meandisq += (i - i0) * (i - i0); ni++; } i0 = i; mode = -1; continue; } if (xs[i] <= wt - delta) mode = 1; } stdev = 1.0; /* Avoid compiler warning */ if (ni > 0) { meandi /= ni; meandisq /= ni; stdev = sqrt(fabs(meandi * meandi - meandisq)); } f1 = meandi / n; if (f1 > .15) f1 = .15; if (ni > 2) { if (stdev / meandi < .05) f2 = 20.; else f2 = meandi / stdev; } else f2 = 1.; #ifdef DEBUG printf(" ni=%3d, f1=%8.4f, f2=%8.4f, f1*f2*ni=%8.4f\n",ni,f1,f2,f1*f2*ni); { static int count=0; FILE *f; int i; f=fopen("inf.ep",count==0?"w":"a"); count++; fprintf(f,"/sa l \"%d\" 1\n",ni); for (i=0;in = boxes->na = 0; boxes->box = NULL; } static void pdfboxes_free(PDFBOXES *boxes) { static char *funcname = "pdfboxes_free"; willus_dmem_free(24, (double **) &boxes->box, funcname); } #ifdef COMMENT static void pdfboxes_add_box(PDFBOXES *boxes,PDFBOX *box) { static char *funcname="pdfboxes_add_box"; if (boxes->n>=boxes->na) { int newsize; newsize = boxes->na < 1024 ? 2048 : boxes->na*2; /* Just calls willus_mem_alloc if oldsize==0 */ willus_mem_realloc_robust_warn((void **)&boxes->box,newsize*sizeof(PDFBOX), boxes->na*sizeof(PDFBOX),funcname,10); boxes->na=newsize; } boxes->box[boxes->n++]=(*box); } static void pdfboxes_delete(PDFBOXES *boxes,int n) { if (n>0 && nn) { int i; for (i=0;in-n;i++) boxes->box[i]=boxes->box[i+n]; } boxes->n -= n; if (boxes->n < 0) boxes->n = 0; } #endif /* ** Track gaps between words so that we can tell when one is out of family. ** lcheight = height of a lowercase letter. */ static void word_gaps_add(BREAKINFO *breakinfo, int lcheight, double *median_gap) { static int nn = 0; static double gap[1024]; static char *funcname = "word_gaps_add"; if (breakinfo != NULL && breakinfo->n > 1) { int i; for (i = 0; i < breakinfo->n - 1; i++) { double g; g = (double) breakinfo->textrow[i].gap / lcheight; if (g >= word_spacing) { gap[nn & 0x3ff] = g; nn++; } } } if (median_gap != NULL) { if (nn > 0) { int n; static double *gap_sorted; n = (nn > 1024) ? 1024 : nn; willus_dmem_alloc_warn(28, (void **) &gap_sorted, sizeof(double) * n, funcname, 10); memcpy(gap_sorted, gap, n * sizeof(double)); sortd(gap_sorted, n); (*median_gap) = gap_sorted[n / 2]; willus_dmem_free(28, &gap_sorted, funcname); } else (*median_gap) = 0.7; } } /* ** bmp must be grayscale! (cbmp = color, can be null) */ static void bmp_detect_vertical_lines(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp, double dpi, double minwidth_in, double maxwidth_in, double minheight_in, double anglemax_deg, int white_thresh) { int tc, iangle, irow, icol; int rowstep, na, angle_sign, ccthresh; int pixmin, halfwidth, bytewidth; int bs1, nrsteps, dp; double anglestep; WILLUSBITMAP *tmp, _tmp; unsigned char *p0; if (debug) printf("At bmp_detect_vertical_lines...\n"); if (!bmp_is_grayscale(bmp)) { printf( "Internal error. bmp_detect_vertical_lines passed a non-grayscale bitmap.\n"); exit(10); } tmp = &_tmp; bmp_init(tmp); bmp_copy(tmp, bmp); dp = bmp_rowptr_from_top(tmp, 0) - bmp_rowptr_from_top(bmp, 0); bytewidth = bmp_bytewidth(bmp); pixmin = (int) (minwidth_in * dpi + .5); if (pixmin < 1) pixmin = 1; halfwidth = pixmin / 4; if (halfwidth < 1) halfwidth = 1; anglestep = atan2((double) halfwidth / dpi, minheight_in); na = (int) ((anglemax_deg * PI / 180.) / anglestep + .5); if (na < 1) na = 1; rowstep = (int) (dpi / 40. + .5); if (rowstep < 2) rowstep = 2; nrsteps = bmp->height / rowstep; bs1 = bytewidth * rowstep; ccthresh = (int) (minheight_in * dpi / rowstep + .5); if (ccthresh < 2) ccthresh = 2; if (debug && verbose) printf( " na = %d, rowstep = %d, ccthresh = %d, white_thresh = %d, nrsteps=%d\n", na, rowstep, ccthresh, white_thresh, nrsteps); /* bmp_write(bmp,"out.png",stdout,97); wfile_written_info("out.png",stdout); */ p0 = bmp_rowptr_from_top(bmp, 0); for (tc = 0; tc < 100; tc++) { int ccmax, ic0max, ir0max; double tanthmax; ccmax = -1; ic0max = ir0max = 0; tanthmax = 0.; for (iangle = 0; iangle <= na; iangle++) { for (angle_sign = 1; angle_sign >= -1; angle_sign -= 2) { double th, tanth, tanthx; int ic1, ic2; if (iangle == 0 && angle_sign == -1) continue; th = (PI / 180.) * iangle * angle_sign * fabs(anglemax_deg) / na; tanth = tan(th); tanthx = tanth * rowstep; if (angle_sign == 1) { ic1 = -(int) (bmp->height * tanth + 1.); ic2 = bmp->width - 1; } else { ic1 = (int) (-bmp->height * tanth + 1.); ic2 = bmp->width - 1 + (int) (-bmp->height * tanth + 1.); } // printf("iangle=%2d, angle_sign=%2d, ic1=%4d, ic2=%4d\n",iangle,angle_sign,ic1,ic2); for (icol = ic1; icol <= ic2; icol++) { unsigned char *p; int cc, ic0, ir0; p = p0; if (icol < 0 || icol > bmp->width - 1) for (irow = 0; irow < nrsteps; irow++, p += bs1) { int ic; ic = icol + irow * tanthx; if (ic >= 0 && ic < bmp->width) break; } else irow = 0; for (ir0 = ic0 = cc = 0; irow < nrsteps; irow++, p += bs1) { int ic; ic = icol + irow * tanthx; if (ic < 0 || ic >= bmp->width) break; if ((p[ic] < white_thresh || p[ic + bytewidth] < white_thresh) && (p[ic + dp] < white_thresh || p[ic + bytewidth + dp] < white_thresh)) { if (cc == 0) { ic0 = ic; ir0 = irow * rowstep; } cc++; if (cc > ccmax) { ccmax = cc; tanthmax = tanth; ic0max = ic0; ir0max = ir0; } } else cc = 0; } } } } if (ccmax < ccthresh) break; if (debug) printf( " Vert line detected: ccmax=%d (pix=%d), tanthmax=%g, ic0max=%d, ir0max=%d\n", ccmax, ccmax * rowstep, tanthmax, ic0max, ir0max); if (!vert_line_erase(bmp, cbmp, tmp, ir0max, ic0max, tanthmax, minheight_in, minwidth_in, maxwidth_in, white_thresh)) break; } /* bmp_write(tmp,"outt.png",stdout,95); wfile_written_info("outt.png",stdout); bmp_write(bmp,"out2.png",stdout,95); wfile_written_info("out2.png",stdout); exit(10); */ } /* ** Calculate max vert line length. Line is terminated by nw consecutive white pixels ** on either side. */ static int vert_line_erase(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp, WILLUSBITMAP *tmp, int row0, int col0, double tanth, double minheight_in, double minwidth_in, double maxwidth_in, int white_thresh) { int lw, cc, maxdev, nw, dir, i, n; int *c1, *c2, *w; static char *funcname = "vert_line_erase"; willus_dmem_alloc_warn(26, (void **) &c1, sizeof(int) * 3 * bmp->height, funcname, 10); c2 = &c1[bmp->height]; w = &c2[bmp->height]; /* maxdev = (int)((double)bmp->height / minheight_in +.5); if (maxdev < 3) maxdev=3; */ nw = (int) ((double) src_dpi / 100. + .5); if (nw < 2) nw = 2; maxdev = nw; for (i = 0; i < bmp->height; i++) c1[i] = c2[i] = -1; n = 0; for (dir = -1; dir <= 1; dir += 2) { int del, brc; brc = 0; for (del = (dir == -1) ? 0 : 1; 1; del++) { int r, c; unsigned char *p; r = row0 + dir * del; if (r < 0 || r > bmp->height - 1) break; c = col0 + (r - row0) * tanth; if (c < 0 || c > bmp->width - 1) break; p = bmp_rowptr_from_top(bmp, r); for (i = c; i <= c + maxdev && i < bmp->width; i++) if (p[i] < white_thresh) break; if (i > c + maxdev || i >= bmp->width) { for (i = c - 1; i >= c - maxdev && i >= 0; i--) if (p[i] < white_thresh) break; if (i < c - maxdev || i < 0) { brc++; if (brc >= nw) break; continue; } } brc = 0; for (c = i, cc = 0; i < bmp->width; i++) if (p[i] < white_thresh) cc = 0; else { cc++; if (cc >= nw) break; } c2[r] = i - cc; if (c2[r] > bmp->width - 1) c2[r] = bmp->width - 1; for (cc = 0, i = c; i >= 0; i--) if (p[i] < white_thresh) cc = 0; else { cc++; if (cc >= nw) break; } c1[r] = i + cc; if (c1[r] < 0) c1[r] = 0; w[n++] = c2[r] - c1[r] + 1; c1[r] -= cc; if (c1[r] < 0) c1[r] = 0; c2[r] += cc; if (c2[r] > bmp->width - 1) c2[r] = bmp->width - 1; } } if (n > 1) sorti(w, n); if (n < 10 || n < minheight_in * src_dpi || w[n / 4] < minwidth_in * src_dpi || w[3 * n / 4] > maxwidth_in * src_dpi || (erase_vertical_lines == 1 && w[n - 1] > maxwidth_in * src_dpi)) { /* Erase area in temp bitmap */ for (i = 0; i < bmp->height; i++) { unsigned char *p; int cmax; if (c1[i] < 0 || c2[i] < 0) continue; cmax = (c2[i] - c1[i]) + 1; p = bmp_rowptr_from_top(tmp, i) + c1[i]; for (; cmax > 0; cmax--, p++) (*p) = 255; } } else { /* Erase line width in source bitmap */ lw = w[3 * n / 4] + nw * 2; if (lw > maxwidth_in * src_dpi / 2) lw = maxwidth_in * src_dpi / 2; for (i = 0; i < bmp->height; i++) { unsigned char *p; int c0, cmin, cmax, count, white; if (c1[i] < 0 || c2[i] < 0) continue; c0 = col0 + (i - row0) * tanth; cmin = c0 - lw - 1; if (cmin < c1[i]) cmin = c1[i]; cmax = c0 + lw + 1; if (cmax > c2[i]) cmax = c2[i]; p = bmp_rowptr_from_top(bmp, i); c0 = (p[cmin] > p[cmax]) ? cmin : cmax; white = p[c0]; if (white <= white_thresh) white = white_thresh + 1; if (white > 255) white = 255; count = (cmax - cmin) + 1; p = &p[cmin]; for (; count > 0; count--, p++) (*p) = white; if (cbmp != NULL) { unsigned char *p0; p = bmp_rowptr_from_top(cbmp, i); p0 = p + c0 * 3; p = p + cmin * 3; count = (cmax - cmin) + 1; for (; count > 0; count--, p += 3) { p[0] = p0[0]; p[1] = p0[1]; p[2] = p0[2]; } } } } willus_dmem_free(26, (double **) &c1, funcname); return (1); } /* ** mem_index... controls which memory allocactions get a protective margin ** around them. */ static int mem_index_min = 999; static int mem_index_max = 999; static void willus_dmem_alloc_warn(int index, void **ptr, int size, char *funcname, int exitcode) { if (index >= mem_index_min && index <= mem_index_max) { char *ptr1; void *x; willus_mem_alloc_warn((void **) &ptr1, size + 2048, funcname, exitcode); ptr1 += 1024; x = (void *) ptr1; (*ptr) = x; } else willus_mem_alloc_warn(ptr, size, funcname, exitcode); } static void willus_dmem_free(int index, double **ptr, char *funcname) { if ((*ptr) == NULL) return; if (index >= mem_index_min && index <= mem_index_max) { double *x; char *ptr1; x = (*ptr); ptr1 = (char *) x; ptr1 -= 1024; x = (double *) ptr1; willus_mem_free(&x, funcname); (*ptr) = NULL; } else willus_mem_free(ptr, funcname); } /* mem.c */ /* ** The reason I don't simply use malloc is because I want to allocate ** memory using type long instead of type size_t. On some compilers, ** like gcc, these are the same, so it doesn't matter. On other ** compilers, like Turbo C, these are different. ** */ static int willus_mem_alloc(double **ptr,long size,char *name) { #if (defined(WIN32) && !defined(__DMC__)) unsigned long memsize; memsize = (unsigned long)size; #ifdef USEGLOBAL (*ptr) = (memsize==size) ? (double *)GlobalAlloc(GPTR,memsize) : NULL; #else (*ptr) = (memsize==size) ? (double *)CoTaskMemAlloc(memsize) : NULL; #endif #else size_t memsize; memsize=(size_t)size; (*ptr) = (memsize==size) ? (double *)malloc(memsize) : NULL; #endif /* { f=fopen("mem.dat","a"); fprintf(f,"willus_mem_alloc(%d,%s)\n",size,name); fclose(f); } */ return((*ptr)!=NULL); } /* ** Prints an integer to 's' with commas separating every three digits. ** E.g. 45,399,350 ** Correctly handles negative values. */ static void comma_print(char *s,long size) { int i,m,neg; char tbuf[80]; if (!size) { s[0]='0'; s[1]='\0'; return; } s[0]='\0'; neg=0; if (size<0) { size=-size; neg=1; } for (i=0,m=size%1000;size;i++,size=(size-m)/1000,m=size%1000) { sprintf(tbuf,m==size ? "%d%s":"%03d%s",m,i>0 ? "," : ""); strcat(tbuf,s); strcpy(s,tbuf); } if (neg) { strcpy(tbuf,"-"); strcat(tbuf,s); strcpy(s,tbuf); } } static void mem_warn(char *name,int size,int exitcode) { static char buf[128]; aprintf("\n" ANSI_RED "\aCannot allocate enough memory for " "function %s." ANSI_NORMAL "\n",name); comma_print(buf,size); aprintf(" " ANSI_RED "(Needed %s bytes.)" ANSI_NORMAL "\n\n",buf); if (exitcode!=0) { aprintf(" " ANSI_RED "Program terminated." ANSI_NORMAL "\n\n"); exit(exitcode); } } static int willus_mem_alloc_warn(void **ptr, int size, char *name, int exitcode) { int status; status = willus_mem_alloc((double **) ptr, (long) size, name); if (!status) mem_warn(name, size, exitcode); return (status); } static void willus_mem_free(double **ptr, char *name) { if ((*ptr) != NULL) { #if (defined(WIN32) && !defined(__DMC__)) #ifdef USEGLOBAL GlobalFree((void *)(*ptr)); #else CoTaskMemFree((void *)(*ptr)); #endif #else free((void *) (*ptr)); #endif (*ptr) = NULL; } } static int willus_mem_realloc_robust(double **ptr,long newsize,long oldsize,char *name) { #if (defined(WIN32) && !defined(__DMC__)) unsigned long memsize; void *newptr; #else size_t memsize; void *newptr; #endif #if (defined(WIN32) && !defined(__DMC__)) memsize=(unsigned long)newsize; #else memsize=(size_t)newsize; #endif if (memsize!=newsize) return(0); if ((*ptr)==NULL || oldsize<=0) return(willus_mem_alloc(ptr,newsize,name)); #if (defined(WIN32) && !defined(__DMC__)) #ifdef USEGLOBAL newptr = (void *)GlobalReAlloc((void *)(*ptr),memsize,GMEM_MOVEABLE); #else newptr = (void *)CoTaskMemRealloc((void *)(*ptr),memsize); #endif #else newptr = realloc((void *)(*ptr),memsize); #endif if (newptr==NULL && willus_mem_alloc((double **)&newptr,newsize,name)) { memcpy(newptr,(*ptr),oldsize); willus_mem_free(ptr,name); } if (newptr==NULL) return(0); (*ptr) = newptr; return(1); } static int willus_mem_realloc_robust_warn(void **ptr,int newsize,int oldsize,char *name, int exitcode) { int status; status = willus_mem_realloc_robust((double **)ptr,newsize,oldsize,name); if (!status) mem_warn(name,newsize,exitcode); return(status); } /* math.c */ static void sortd(double *x, int n) { int top, n1; double x0; if (n < 2) return; top = n / 2; n1 = n - 1; while (1) { if (top > 0) { top--; x0 = x[top]; } else { x0 = x[n1]; x[n1] = x[0]; n1--; if (!n1) { x[0] = x0; return; } } { int parent, child; parent = top; child = top * 2 + 1; while (child <= n1) { if (child < n1 && x[child] < x[child + 1]) child++; if (x0 < x[child]) { x[parent] = x[child]; parent = child; child += (parent + 1); } else break; } x[parent] = x0; } } } static void sorti(int *x, int n) { int top, n1; int x0; if (n < 2) return; top = n / 2; n1 = n - 1; while (1) { if (top > 0) { top--; x0 = x[top]; } else { x0 = x[n1]; x[n1] = x[0]; n1--; if (!n1) { x[0] = x0; return; } } { int parent, child; parent = top; child = top * 2 + 1; while (child <= n1) { if (child < n1 && x[child] < x[child + 1]) child++; if (x0 < x[child]) { x[parent] = x[child]; parent = child; child += (parent + 1); } else break; } x[parent] = x0; } } } /* bmp.c */ /* ** Should call bmp_set_type() right after this to set the bitmap type. */ #define RGBSET24(bmp,ptr,r,g,b) \ if (bmp->type==WILLUSBITMAP_TYPE_NATIVE) \ { \ ptr[0]=r; \ ptr[1]=g; \ ptr[2]=b; \ } \ else \ { \ ptr[2]=r; \ ptr[1]=g; \ ptr[0]=b; \ } #define RGBGET(bmp,ptr,r,g,b) \ if (bmp->bpp==8) \ { \ r=bmp->red[ptr[0]]; \ g=bmp->green[ptr[0]]; \ b=bmp->blue[ptr[0]]; \ } \ else if (bmp->type==WILLUSBITMAP_TYPE_NATIVE) \ { \ r=ptr[0]; \ g=ptr[1]; \ b=ptr[2]; \ } \ else \ { \ r=ptr[2]; \ g=ptr[1]; \ b=ptr[0]; \ } #define RGBGETINCPTR(bmp,ptr,r,g,b) \ if (bmp->bpp==8) \ { \ r=bmp->red[ptr[0]]; \ g=bmp->green[ptr[0]]; \ b=bmp->blue[ptr[0]]; \ ptr++; \ } \ else if (bmp->type==WILLUSBITMAP_TYPE_NATIVE) \ { \ r=ptr[0]; \ g=ptr[1]; \ b=ptr[2]; \ ptr+=3; \ } \ else \ { \ r=ptr[2]; \ g=ptr[1]; \ b=ptr[0]; \ ptr+=3; \ } static void bmp_init(WILLUSBITMAP *bmap) { bmap->data = NULL; bmap->size_allocated = 0; bmap->type = WILLUSBITMAP_TYPE_NATIVE; } static int bmp_bytewidth_win32(WILLUSBITMAP *bmp) { return(((bmp->bpp==24 ? bmp->width*3 : bmp->width)+3)&(~0x3)); } /* ** The width, height, and bpp parameters of the WILLUSBITMAP structure ** should be set before calling this function. */ static int bmp_alloc(WILLUSBITMAP *bmap) { int size; static char *funcname = "bmp_alloc"; if (bmap->bpp != 8 && bmap->bpp != 24) { printf("Internal error: call to bmp_alloc has bpp!=8 and bpp!=24!\n"); exit(10); } /* Choose the max size even if not WIN32 to avoid memory faults */ /* and to allow the possibility of changing the "type" of the */ /* bitmap without reallocating memory. */ size = bmp_bytewidth_win32(bmap) * bmap->height; if (bmap->data != NULL && bmap->size_allocated >= size) return (1); if (bmap->data != NULL) willus_mem_realloc_robust_warn((void **) &bmap->data, size, bmap->size_allocated, funcname, 10); else willus_mem_alloc_warn((void **) &bmap->data, size, funcname, 10); bmap->size_allocated = size; return (1); } static void bmp_free(WILLUSBITMAP *bmap) { if (bmap->data!=NULL) { willus_mem_free((double **)&bmap->data,"bmp_free"); bmap->data=NULL; bmap->size_allocated=0; } } /* ** If 8-bit, the bitmap is filled with . ** If 24-bit, it gets , , values. */ static void bmp_fill(WILLUSBITMAP *bmp,int r,int g,int b) { int y,n; if (bmp->bpp==8 || (r==g && r==b)) { memset(bmp->data,r,bmp->size_allocated); return; } if (bmp->type==WILLUSBITMAP_TYPE_WIN32 && bmp->bpp==24) { y=r; r=b; b=y; } for (y=bmp->height-1;y>=0;y--) { unsigned char *p; p=bmp_rowptr_from_top(bmp,y); for (n=bmp->width-1;n>=0;n--) { (*p)=r; p++; (*p)=g; p++; (*p)=b; p++; } } } static int bmp_copy(WILLUSBITMAP *dest, WILLUSBITMAP *src) { dest->width = src->width; dest->height = src->height; dest->bpp = src->bpp; dest->type = src->type; if (!bmp_alloc(dest)) return (0); memcpy(dest->data, src->data, src->height * bmp_bytewidth(src)); memcpy(dest->red, src->red, sizeof(int) * 256); memcpy(dest->green, src->green, sizeof(int) * 256); memcpy(dest->blue, src->blue, sizeof(int) * 256); return (1); } static int bmp_bytewidth(WILLUSBITMAP *bmp) { return (bmp->bpp == 24 ? bmp->width * 3 : bmp->width); } /* ** row==0 ==> top row of bitmap ** row==bmp->height-1 ==> bottom row of bitmap ** (regardless of bitmap type) */ static unsigned char *bmp_rowptr_from_top(WILLUSBITMAP *bmp, int row) { if (bmp->type == WILLUSBITMAP_TYPE_WIN32) return (&bmp->data[bmp_bytewidth(bmp) * (bmp->height - 1 - row)]); else return (&bmp->data[bmp_bytewidth(bmp) * row]); } /* ** Allocate more bitmap rows. ** ratio typically something like 1.5 or 2.0 */ static void bmp_more_rows(WILLUSBITMAP *bmp, double ratio, int pixval) { int new_height, new_bytes, bw; static char *funcname = "bmp_more_rows"; new_height = (int) (bmp->height * ratio + .5); if (new_height <= bmp->height) return; bw = bmp_bytewidth(bmp); new_bytes = bw * new_height; if (new_bytes > bmp->size_allocated) { willus_mem_realloc_robust_warn((void **) &bmp->data, new_bytes, bmp->size_allocated, funcname, 10); bmp->size_allocated = new_bytes; } /* Fill in */ memset(bmp_rowptr_from_top(bmp, bmp->height), pixval, (new_height - bmp->height) * bw); bmp->height = new_height; } static double resample_single(double *y,double x1,double x2) { int i,i1,i2; double dx,dx1,dx2,sum; i1=floor(x1); i2=floor(x2); if (i1==i2) return(y[i1]); dx=x2-x1; if (dx>1.) dx=1.; dx1= 1.-(x1-i1); dx2= x2-i2; sum=0.; if (dx1 > 1e-8*dx) sum += dx1*y[i1]; if (dx2 > 1e-8*dx) sum += dx2*y[i2]; for (i=i1+1;i<=i2-1;sum+=y[i],i++); return(sum/(x2-x1)); } /* ** Resample src[] into dst[]. ** Examples: resample_1d(dst,src,0.,5.,5) would simply copy the ** first five elements of src[] to dst[]. ** ** resample_1d(dst,src,0.,5.,10) would work as follows: ** dst[0] and dst[1] would get src[0]. ** dst[2] and dst[3] would get src[1]. ** and so on. ** */ static void resample_1d(double *dst,double *src,double x1,double x2, int n) { int i; double new,last; last=x1; for (i=0;itype==WILLUSBITMAP_TYPE_WIN32 && color>=0) color=2-color; for (row=0;rowbpp==8) { switch (color) { case -1: for (col=0,p+=x0;colred[p[0]]; break; case 1: for (col=0,p+=x0;colgreen[p[0]]; break; case 2: for (col=0,p+=x0;colblue[p[0]]; break; } } else { p+=color; for (col=0,p+=3*x0;colwidth (x-coord), and top to bottom go from ** 0.0 to src->height (y-coord). ** The cropped rectangle (x1,y1) to (x2,y2) is placed into ** the destination bitmap, which need not be allocated yet. ** ** The destination bitmap will be 8-bit grayscale if the source bitmap ** passes the bmp_is_grayscale() function. Otherwise it will be 24-bit. ** ** Returns 0 for okay. ** -1 for not enough memory. ** -2 for bad cropping area or destination bitmap size */ static int bmp_resample(WILLUSBITMAP *dest, WILLUSBITMAP *src, double x1, double y1, double x2, double y2, int newwidth, int newheight) { int gray, maxlen, colorplanes; double t; double *tempbmp; double *temprow; int color, hmax, row, col, dy; static char *funcname = "bmp_resample"; /* Clip and sort x1,y1 and x2,y2 */ if (x1 > src->width) x1 = src->width; else if (x1 < 0.) x1 = 0.; if (x2 > src->width) x2 = src->width; else if (x2 < 0.) x2 = 0.; if (y1 > src->height) y1 = src->height; else if (y1 < 0.) y1 = 0.; if (y2 > src->height) y2 = src->height; else if (y2 < 0.) y2 = 0.; if (x2 < x1) { t = x2; x2 = x1; x1 = t; } if (y2 < y1) { t = y2; y2 = y1; y1 = t; } dy = y2 - y1; dy += 2; if (x2 - x1 == 0. || y2 - y1 == 0.) return (-2); /* Allocate temp storage */ maxlen = x2 - x1 > dy + newheight ? (int) (x2 - x1) : dy + newheight; maxlen += 16; hmax = newheight > dy ? newheight : dy; if (!willus_mem_alloc(&temprow, maxlen * sizeof(double), funcname)) return (-1); if (!willus_mem_alloc(&tempbmp, hmax * newwidth * sizeof(double), funcname)) { willus_mem_free(&temprow, funcname); return (-1); } if ((gray = bmp_is_grayscale(src)) != 0) { int i; dest->bpp = 8; for (i = 0; i < 256; i++) dest->red[i] = dest->blue[i] = dest->green[i] = i; } else dest->bpp = 24; dest->width = newwidth; dest->height = newheight; dest->type = WILLUSBITMAP_TYPE_NATIVE; if (!bmp_alloc(dest)) { willus_mem_free(&tempbmp, funcname); willus_mem_free(&temprow, funcname); return (-1); } colorplanes = gray ? 1 : 3; for (color = 0; color < colorplanes; color++) { bmp_resample_1(tempbmp, src, x1, y1, x2, y2, newwidth, newheight, temprow, gray ? -1 : color); for (row = 0; row < newheight; row++) { unsigned char *p; double *s; p = bmp_rowptr_from_top(dest, row) + color; s = &tempbmp[row * newwidth]; if (colorplanes == 1) for (col = 0; col < newwidth; p[0] = (int) (s[0] + .5), col++, s++, p++) ; else for (col = 0; col < newwidth; p[0] = (int) (s[0] + .5), col++, s++, p += colorplanes) ; } } willus_mem_free(&tempbmp, funcname); willus_mem_free(&temprow, funcname); return (0); } static int bmp8_greylevel_convert(int r,int g,int b) { return((int)((r*0.3+g*0.59+b*0.11)*1.002)); } /* ** One of dest or src can be NULL, which is the ** same as setting them equal to each other, but ** in this case, the bitmap must be 24-bit! */ static int bmp_is_grayscale(WILLUSBITMAP *bmp) { int i; if (bmp->bpp!=8) return(0); for (i=0;i<256;i++) if (bmp->red[i]!=i || bmp->green[i]!=i || bmp->blue[i]!=i) return(0); return(1); } static void bmp_color_xform8(WILLUSBITMAP *dest,WILLUSBITMAP *src,unsigned char *newval) { int i,ir; if (src==NULL) src=dest; if (dest==NULL) dest=src; if (dest!=src) { dest->width = src->width; dest->height = src->height; dest->bpp = 8; for (i=0;i<256;i++) dest->red[i]=dest->green[i]=dest->blue[i]=i; bmp_alloc(dest); } for (ir=0;irheight;ir++) { unsigned char *sp,*dp; sp=bmp_rowptr_from_top(src,ir); dp=bmp_rowptr_from_top(dest,ir); for (i=0;iwidth;i++) dp[i]=newval[sp[i]]; } } /* ** One of dest or src can be NULL, which is the ** same as setting them equal to each other, but ** in this case, the bitmap must be 24-bit! */ static void bmp_color_xform(WILLUSBITMAP *dest,WILLUSBITMAP *src,unsigned char *newval) { int ir,ic; if (src==NULL) src=dest; if (dest==NULL) dest=src; if (bmp_is_grayscale(src)) { bmp_color_xform8(dest,src,newval); return; } if (dest!=src) { dest->width = src->width; dest->height = src->height; dest->bpp = 24; bmp_alloc(dest); } for (ir=0;irheight;ir++) { unsigned char *sp,*dp; sp=bmp_rowptr_from_top(src,ir); dp=bmp_rowptr_from_top(dest,ir); for (ic=0;icwidth;ic++,dp+=3) { int r,g,b; RGBGETINCPTR(src,sp,r,g,b); r=newval[r]; g=newval[g]; b=newval[b]; RGBSET24(dest,dp,r,g,b); } } } /* ** One of dest or src can be NULL, which is the ** same as setting them equal to each other, but ** in this case, the bitmap must be 24-bit! ** Note: contrast > 1 will increase the contrast. ** contrast < 1 will decrease the contrast. ** contrast of 0 will make all pixels the same value. ** contrast of 1 will not change the image. */ static void bmp_contrast_adjust(WILLUSBITMAP *dest,WILLUSBITMAP *src,double contrast) { int i; static unsigned char newval[256]; for (i=0;i<256;i++) { double x,y; int sgn,v; x=(i-127.5)/127.5; sgn = x<0 ? -1 : 1; if (contrast<0) sgn = -sgn; x=fabs(x); if (fabs(contrast)>1.5) y=x<.99999 ? 1-exp(fabs(contrast)*x/(x-1)) : 1.; else { y=fabs(contrast)*x; if (y>1.) y=1.; } y = 127.5+y*sgn*127.5; v = (int)(y+.5); if (v<0) v=0; if (v>255) v=255; newval[i] = v; } bmp_color_xform(dest,src,newval); } /* ** Convert bitmap to grey-scale in-situ */ static void bmp_convert_to_greyscale_ex(WILLUSBITMAP *dst, WILLUSBITMAP *src) { int oldbpr, newbpr, bpp, dp, rownum, colnum, i; oldbpr = bmp_bytewidth(src); dp = src->bpp == 8 ? 1 : 3; bpp = src->bpp; dst->bpp = 8; for (i = 0; i < 256; i++) dst->red[i] = dst->green[i] = dst->blue[i] = i; if (dst != src) { dst->width = src->width; dst->height = src->height; bmp_alloc(dst); } newbpr = bmp_bytewidth(dst); /* Possibly restore src->bpp to 24 so RGBGET works right (src & dst may be the same) */ src->bpp = bpp; for (rownum = 0; rownum < src->height; rownum++) { unsigned char *oldp, *newp; oldp = &src->data[oldbpr * rownum]; newp = &dst->data[newbpr * rownum]; for (colnum = 0; colnum < src->width; colnum++, oldp += dp, newp++) { int r, g, b; RGBGET(src, oldp, r, g, b); (*newp) = bmp8_greylevel_convert(r, g, b); } } dst->bpp = 8; /* Possibly restore dst->bpp to 8 since src & dst may be the same. */ } /* bmpmupdf.c */ static int bmpmupdf_pixmap_to_bmp(WILLUSBITMAP *bmp, fz_context *ctx, fz_pixmap *pixmap) { unsigned char *p; int ncomp, i, row, col; bmp->width = fz_pixmap_width(ctx, pixmap); bmp->height = fz_pixmap_height(ctx, pixmap); ncomp = fz_pixmap_components(ctx, pixmap); /* Has to be 8-bit or RGB */ if (ncomp != 2 && ncomp != 4) return (-1); bmp->bpp = (ncomp == 2) ? 8 : 24; bmp_alloc(bmp); if (ncomp == 2) for (i = 0; i < 256; i++) bmp->red[i] = bmp->green[i] = bmp->blue[i] = i; p = fz_pixmap_samples(ctx, pixmap); if (ncomp == 1) for (row = 0; row < bmp->height; row++) { unsigned char *dest; dest = bmp_rowptr_from_top(bmp, row); memcpy(dest, p, bmp->width); p += bmp->width; } else if (ncomp == 2) for (row = 0; row < bmp->height; row++) { unsigned char *dest; dest = bmp_rowptr_from_top(bmp, row); for (col = 0; col < bmp->width; col++, dest++, p += 2) dest[0] = p[0]; } else for (row = 0; row < bmp->height; row++) { unsigned char *dest; dest = bmp_rowptr_from_top(bmp, row); for (col = 0; col < bmp->width; col++, dest += ncomp - 1, p += ncomp) memcpy(dest, p, ncomp - 1); } return (0); } static void handle(int wait, ddjvu_context_t *ctx) { const ddjvu_message_t *msg; if (!ctx) return; if (wait) msg = ddjvu_message_wait(ctx); while ((msg = ddjvu_message_peek(ctx))) { switch(msg->m_any.tag) { case DDJVU_ERROR: fprintf(stderr,"ddjvu: %s\n", msg->m_error.message); if (msg->m_error.filename) fprintf(stderr,"ddjvu: '%s:%d'\n", msg->m_error.filename, msg->m_error.lineno); exit(10); default: break; } } ddjvu_message_pop(ctx); }