2
0
mirror of https://github.com/koreader/koreader synced 2024-11-18 03:25:46 +00:00
koreader/k2pdfopt.c

6461 lines
183 KiB
C
Raw Normal View History

/*
** k2pdfopt.c K2pdfopt optimizes PDF/DJVU files for mobile e-readers
** (e.g. the Kindle) and smartphones. It works well on
** multi-column PDF/DJVU files. K2pdfopt is freeware.
**
** Copyright (C) 2012 http://willus.com
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU Affero General Public License as
** published by the Free Software Foundation, either version 3 of the
** License, or (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU Affero General Public License for more details.
**
** You should have received a copy of the GNU Affero General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
**
/*
** WILLUSDEBUGX flags:
** 1 = Generic
** 2 = breakinfo row analysis
** 4 = word wrapping
** 8 = word wrapping II
** 16 = hyphens
** 32 = OCR
**
*/
// #define WILLUSDEBUGX 32
// #define WILLUSDEBUG
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include "k2pdfopt.h"
#define HAVE_MUPDF
#define VERSION "v1.51"
#define GRAYLEVEL(r,g,b) ((int)(((r)*0.3+(g)*0.59+(b)*0.11)*1.002))
#if (defined(WIN32) || defined(WIN64))
#define TTEXT_BOLD ANSI_WHITE
#define TTEXT_NORMAL ANSI_NORMAL
#define TTEXT_BOLD2 ANSI_YELLOW
#define TTEXT_INPUT ANSI_GREEN
#define TTEXT_WARN ANSI_RED
#define TTEXT_HEADER ANSI_CYAN
#define TTEXT_MAGENTA ANSI_MAGENTA
#else
#define TTEXT_BOLD "\x1b[0m\x1b[34m"
#define TTEXT_NORMAL "\x1b[0m"
#define TTEXT_BOLD2 "\x1b[0m\x1b[33m"
#define TTEXT_INPUT "\x1b[0m\x1b[32m"
#define TTEXT_WARN "\x1b[0m\x1b[31m"
#define TTEXT_HEADER "\x1b[0m\x1b[36m"
#define TTEXT_MAGENTA "\x1b[0m\x1b[35m"
#endif
#ifndef __ANSI_H__
#define ANSI_RED "\x1b[1m\x1b[31m"
#define ANSI_GREEN "\x1b[1m\x1b[32m"
#define ANSI_YELLOW "\x1b[1m\x1b[33m"
#define ANSI_BROWN "\x1b[0m\x1b[33m"
#define ANSI_BLUE "\x1b[1m\x1b[34m"
#define ANSI_MAGENTA "\x1b[1m\x1b[35m"
#define ANSI_CYAN "\x1b[1m\x1b[36m"
#define ANSI_WHITE "\x1b[1m\x1b[37m"
#define ANSI_NORMAL "\x1b[0m\x1b[37m"
#define ANSI_SAVE_CURSOR "\x1b[s"
#define ANSI_RESTORE_CURSOR "\x1b[u"
#define ANSI_CLEAR_TO_END "\x1b[K"
#define ANSI_BEGIN_LINE "\x1b[80D"
#define ANSI_UP_ONE_LINE "\x1b[1A"
#define ANSI_HOME "\x1b[2J\x1b[0;0;H"
#define __ANSI_H__
#endif
/* bmp.c */
#define WILLUSBITMAP_TYPE_NATIVE 0
#define WILLUSBITMAP_TYPE_WIN32 1
#ifdef PI
#undef PI
#endif
/*
** Constants from the front of the CRC standard math tables
** (Accuracy = 50 digits)
*/
#define PI 3.14159265358979323846264338327950288419716939937511
#define SQRT2 1.41421356237309504880168872420969807856967187537695
#define SQRT3 1.73205080756887729352744634150587236694280525381039
#define LOG10E 0.43429448190325182765112891891660508229439700580367
#define DBPERNEP (20.*LOG10E)
#define SRC_TYPE_PDF 1
#define SRC_TYPE_DJVU 2
#define SRC_TYPE_OTHER 3
/* DATA STRUCTURES */
typedef struct {
int page; /* Source page */
double rot_deg; /* Source rotation (happens first) */
double x0, y0; /* x0,y0, in points, of lower left point on rectangle */
double w, h; /* width and height of rectangle in points */
double scale; /* Scale rectangle by this factor on destination page */
double x1, y1; /* (x,y) position of lower left point on destination page, in points */
} PDFBOX;
typedef struct {
PDFBOX *box;
int n;
int na;
} PDFBOXES;
typedef struct {
int pageno; /* Source page number */
double page_rot_deg; /* Source page rotation */
PDFBOXES boxes;
} PAGEINFO;
typedef struct {
int ch; /* Hyphen starting point -- < 0 for no hyphen */
int c2; /* End of end region if hyphen is erased */
int r1; /* Top of hyphen */
int r2; /* Bottom of hyphen */
} HYPHENINFO;
typedef struct {
int c1, c2; /* Left and right columns */
int r1, r2; /* Top and bottom of region in pixels */
int rowbase; /* Baseline of row */
int gap; /* Gap to next region in pixels */
int rowheight; /* text + gap */
int capheight;
int h5050;
int lcheight;
HYPHENINFO hyphen;
} TEXTROW;
typedef struct {
TEXTROW *textrow;
int rhmean_pixels; /* Mean row height (text) */
int centered; /* Is this set of rows centered? */
int n, na;
} BREAKINFO;
typedef struct {
int red[256];
int green[256];
int blue[256];
unsigned char *data; /* Top to bottom in native type, bottom to */
/* top in Win32 type. */
int width; /* Width of image in pixels */
int height; /* Height of image in pixels */
int bpp; /* Bits per pixel (only 8 or 24 allowed) */
int size_allocated;
int type; /* See defines above for WILLUSBITMAP_TYPE_... */
} WILLUSBITMAP;
typedef struct {
int r1, r2; /* row position from top of bmp, inclusive */
int c1, c2; /* column positions, inclusive */
int rowbase; /* Baseline of text row */
int capheight; /* capital letter height */
int h5050;
int lcheight; /* lower-case letter height */
int bgcolor; /* 0 - 255 */
HYPHENINFO hyphen;
WILLUSBITMAP *bmp;
WILLUSBITMAP *bmp8;
WILLUSBITMAP *marked;
} BMPREGION;
typedef struct {
WILLUSBITMAP bmp;
int rows;
int published_pages;
int bgcolor;
int fit_to_page;
int wordcount;
char debugfolder[256];
} MASTERINFO;
static int verbose = 0;
static int debug = 0;
#define DEFAULT_WIDTH 600
#define DEFAULT_HEIGHT 800
#define MIN_REGION_WIDTH_INCHES 1.0
#define SRCROT_AUTO -999.
#define SRCROT_AUTOEP -998.
/*
** Blank Area Threshold Widths--average black pixel width, in inches, that
** prevents a region from being determined as "blank" or clear.
*/
static double gtc_in = .005; // detecting gap between columns
static double gtr_in = .006; // detecting gap between rows
static double gtw_in = .0015; // detecting gap between words
// static double gtm_in=.005; // detecting margins for trimming
static int src_left_to_right = 1;
static int src_whitethresh = -1;
static int dst_dpi = 167;
static int fit_columns = 1;
static int src_dpi = 300;
static int dst_width = DEFAULT_WIDTH; /* Full device width in pixels */
static int dst_height = DEFAULT_HEIGHT;
static int dst_userwidth = DEFAULT_WIDTH;
static int dst_userheight = DEFAULT_HEIGHT;
static int dst_justify = -1; // 0 = left, 1 = center
static int dst_figure_justify = -1; // -1 = same as dst_justify. 0=left 1=center 2=right
static double dst_min_figure_height_in = 0.75;
static int dst_fulljustify = -1; // 0 = no, 1 = yes
static int dst_color = 0;
static int dst_landscape = 0;
static double dst_mar = 0.02;
static double dst_martop = -1.0;
static double dst_marbot = -1.0;
static double dst_marleft = -1.0;
static double dst_marright = -1.0;
static double min_column_gap_inches = 0.1;
static double max_column_gap_inches = 1.5; // max gap between columns
static double min_column_height_inches = 1.5;
static double mar_top = -1.0;
static double mar_bot = -1.0;
static double mar_left = -1.0;
static double mar_right = -1.0;
static double max_region_width_inches = 3.6; /* Max viewable width (device width minus margins) */
static int max_columns = 2;
static double column_gap_range = 0.33;
static double column_offset_max = 0.2;
static double column_row_gap_height_in = 1. / 72.;
static int text_wrap = 1;
static double word_spacing = 0.375;
static double display_width_inches = 3.6; /* Device width = dst_width / dst_dpi */
static int column_fitted = 0;
static double lm_org, bm_org, tm_org, rm_org, dpi_org;
static double contrast_max = 2.0;
static int show_marked_source = 0;
static double defect_size_pts = 1.0;
static double max_vertical_gap_inches = 0.25;
static double vertical_multiplier = 1.0;
static double vertical_line_spacing = -1.2;
static double vertical_break_threshold = 1.75;
static int erase_vertical_lines = 0;
static int k2_hyphen_detect = 1;
static int dst_fit_to_page = 0;
/*
** Undocumented cmd-line args
*/
static double no_wrap_ar_limit = 0.2; /* -arlim */
static double no_wrap_height_limit_inches = 0.55; /* -whmax */
static double little_piece_threshold_inches = 0.5; /* -rwmin */
/*
** Keeping track of vertical gaps
*/
static double last_scale_factor_internal = -1.0;
/* indicates desired vert. gap before next region is added. */
static int last_rowbase_internal; /* Pixels between last text row baseline and current end */
/* of destination bitmap. */
static int beginning_gap_internal = -1;
static int last_h5050_internal = -1;
static int just_flushed_internal = 0;
static int gap_override_internal; /* If > 0, apply this gap in wrapbmp_flush() and then reset. */
void adjust_params_init(void);
void set_region_widths(void);
static void mark_source_page(BMPREGION *region, int caller_id, int mark_flags);
static void fit_column_to_screen(double column_width_inches);
static void restore_output_dpi(void);
void adjust_contrast(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey, int *white);
static int bmpregion_row_black_count(BMPREGION *region, int r0);
static void bmpregion_row_histogram(BMPREGION *region);
static int bmpregion_find_multicolumn_divider(BMPREGION *region,
int *row_black_count, BMPREGION *pageregion, int *npr, int *colcount,
int *rowcount);
static int bmpregion_column_height_and_gap_test(BMPREGION *column,
BMPREGION *region, int r1, int r2, int cmid, int *colcount,
int *rowcount);
static int bmpregion_is_clear(BMPREGION *region, int *row_is_clear,
double gt_in);
void bmpregion_multicolumn_add(BMPREGION *region, MASTERINFO *masterinfo,
int level, PAGEINFO *pageinfo, int colgap0_pixels);
static void bmpregion_vertically_break(BMPREGION *region,
MASTERINFO *masterinfo, int allow_text_wrapping, double force_scale,
int *colcount, int *rowcount, PAGEINFO *pageinfo, int colgap_pixels,
int ncols);
static void bmpregion_add(BMPREGION *region, BREAKINFO *breakinfo,
MASTERINFO *masterinfo, int allow_text_wrapping, int trim_flags,
int allow_vertical_breaks, double force_scale, int justify_flags,
int caller_id, int *colcount, int *rowcount, PAGEINFO *pageinfo,
int mark_flags, int rowbase_delta);
static void dst_add_gap_src_pixels(char *caller, MASTERINFO *masterinfo,
int pixels);
static void dst_add_gap(MASTERINFO *masterinfo, double inches);
static void bmp_src_to_dst(MASTERINFO *masterinfo, WILLUSBITMAP *src,
int justification_flags, int whitethresh, int nocr, int dpi);
static void bmp_fully_justify(WILLUSBITMAP *jbmp, WILLUSBITMAP *src, int nocr,
int whitethresh, int just);
#ifdef HAVE_OCR
static void ocrwords_fill_in(OCRWORDS *words,WILLUSBITMAP *src,int whitethresh,int dpi);
#endif
static void bmpregion_trim_margins(BMPREGION *region, int *colcount0,
int *rowcount0, int flags);
static void bmpregion_hyphen_detect(BMPREGION *region);
#if (WILLUSDEBUGX & 6)
static void breakinfo_echo(BREAKINFO *bi);
#endif
#if (defined(WILLUSDEBUGX) || defined(WILLUSDEBUG))
static void bmpregion_write(BMPREGION *region,char *filename);
#endif
static int height2_calc(int *rc, int n);
static void trim_to(int *count, int *i1, int i2, double gaplen);
static void bmpregion_analyze_justification_and_line_spacing(BMPREGION *region,
BREAKINFO *breakinfo, MASTERINFO *masterinfo, int *colcount,
int *rowcount, PAGEINFO *pageinfo, int allow_text_wrapping,
double force_scale);
static int bmpregion_is_centered(BMPREGION *region, BREAKINFO *breakinfo,
int i1, int i2, int *textheight);
static double median_val(double *x, int n);
static void bmpregion_find_vertical_breaks(BMPREGION *region,
BREAKINFO *breakinfo, int *colcount, int *rowcount, double apsize_in);
static void textrow_assign_bmpregion(TEXTROW *textrow, BMPREGION *region);
static void breakinfo_compute_row_gaps(BREAKINFO *breakinfo, int r2);
static void breakinfo_compute_col_gaps(BREAKINFO *breakinfo, int c2);
static void breakinfo_remove_small_col_gaps(BREAKINFO *breakinfo, int lcheight,
double mingap);
static void breakinfo_remove_small_rows(BREAKINFO *breakinfo, double fracrh,
double fracgap, BMPREGION *region, int *colcount, int *rowcount);
static void breakinfo_alloc(int index, BREAKINFO *breakinfo, int nrows);
static void breakinfo_free(int index, BREAKINFO *breakinfo);
static void breakinfo_sort_by_gap(BREAKINFO *breakinfo);
static void breakinfo_sort_by_row_position(BREAKINFO *breakinfo);
static void bmpregion_one_row_find_breaks(BMPREGION *region,
BREAKINFO *breakinfo, int *colcount, int *rowcount, int add_to_dbase);
void wrapbmp_init(void);
static int wrapbmp_ends_in_hyphen(void);
static void wrapbmp_set_color(int is_color);
static void wrapbmp_free(void);
static void wrapbmp_set_maxgap(int value);
static int wrapbmp_width(void);
static int wrapbmp_remaining(void);
static void wrapbmp_add(BMPREGION *region, int gap, int line_spacing, int rbase,
int gio, int justification_flags);
static void wrapbmp_flush(MASTERINFO *masterinfo, int allow_full_justify,
PAGEINFO *pageinfo, int use_bgi);
static void wrapbmp_hyphen_erase(void);
static void bmpregion_one_row_wrap_and_add(BMPREGION *region,
BREAKINFO *breakinfo, int index, int i0, int i1, MASTERINFO *masterinfo,
int justflags, int *colcount, int *rowcount, PAGEINFO *pageinfo,
int rheight, int mean_row_gap, int rowbase, int marking_flags, int pi);
static void white_margins(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey);
static void get_white_margins(BMPREGION *region);
/* Bitmap orientation detection functions */
static double bitmap_orientation(WILLUSBITMAP *bmp);
static double bmp_inflections_vertical(WILLUSBITMAP *srcgrey, int ndivisions,
int delta, int *wthresh);
static double bmp_inflections_horizontal(WILLUSBITMAP *srcgrey, int ndivisions,
int delta, int *wthresh);
static int inflection_count(double *x, int n, int delta, int *wthresh);
static void pdfboxes_init(PDFBOXES *boxes);
static void pdfboxes_free(PDFBOXES *boxes);
/*
static void pdfboxes_add_box(PDFBOXES *boxes,PDFBOX *box);
static void pdfboxes_delete(PDFBOXES *boxes,int n);
*/
static void word_gaps_add(BREAKINFO *breakinfo, int lcheight,
double *median_gap);
static void bmp_detect_vertical_lines(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp,
double dpi, double minwidth_in, double maxwidth_in, double minheight_in,
double anglemax_deg, int white_thresh);
static int vert_line_erase(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp,
WILLUSBITMAP *tmp, int row0, int col0, double tanthx,
double minheight_in, double minwidth_in, double maxwidth_in,
int white_thresh);
static void willus_dmem_alloc_warn(int index, void **ptr, int size,
char *funcname, int exitcode);
static void willus_dmem_free(int index, double **ptr, char *funcname);
static int willus_mem_alloc_warn(void **ptr, int size, char *name, int exitcode);
static void willus_mem_free(double **ptr, char *name);
static void sortd(double *x, int n);
static void sorti(int *x, int n);
static void bmp_init(WILLUSBITMAP *bmap);
static int bmp_alloc(WILLUSBITMAP *bmap);
static void bmp_free(WILLUSBITMAP *bmap);
static int bmp_copy(WILLUSBITMAP *dest, WILLUSBITMAP *src);
static void bmp_fill(WILLUSBITMAP *bmp,int r,int g,int b);
static int bmp_bytewidth(WILLUSBITMAP *bmp);
static unsigned char *bmp_rowptr_from_top(WILLUSBITMAP *bmp, int row);
static void bmp_more_rows(WILLUSBITMAP *bmp, double ratio, int pixval);
static int bmp_is_grayscale(WILLUSBITMAP *bmp);
static int bmp_resample(WILLUSBITMAP *dest, WILLUSBITMAP *src, double x1,
double y1, double x2, double y2, int newwidth, int newheight);
static void bmp_contrast_adjust(WILLUSBITMAP *dest,WILLUSBITMAP *src,double contrast);
static void bmp_convert_to_greyscale_ex(WILLUSBITMAP *dst, WILLUSBITMAP *src);
static int bmpmupdf_pixmap_to_bmp(WILLUSBITMAP *bmp, fz_context *ctx,
fz_pixmap *pixmap);
static void handle(int wait, ddjvu_context_t *ctx);
static MASTERINFO _masterinfo, *masterinfo;
static int master_bmp_inited = 0;
static int master_bmp_width = 0;
static int master_bmp_height = 0;
static int max_page_width_pix = 3000;
static int max_page_height_pix = 4000;
static double shrink_factor = 0.9;
static double zoom_value = 1.0;
static void k2pdfopt_reflow_bmp(MASTERINFO *masterinfo, WILLUSBITMAP *src) {
PAGEINFO _pageinfo, *pageinfo;
WILLUSBITMAP _srcgrey, *srcgrey;
int i, white, dpi;
double area_ratio;
masterinfo->debugfolder[0] = '\0';
white = src_whitethresh; /* Will be set by adjust_contrast() or set to src_whitethresh */
dpi = src_dpi;
adjust_params_init();
set_region_widths();
srcgrey = &_srcgrey;
if (master_bmp_inited == 0) {
bmp_init(&masterinfo->bmp);
master_bmp_inited = 1;
}
bmp_free(&masterinfo->bmp);
bmp_init(&masterinfo->bmp);
bmp_init(srcgrey);
wrapbmp_init();
int ii;
masterinfo->bmp.bpp = 8;
for (ii = 0; ii < 256; ii++)
masterinfo->bmp.red[ii] = masterinfo->bmp.blue[ii] =
masterinfo->bmp.green[ii] = ii;
masterinfo->rows = 0;
masterinfo->bmp.width = dst_width;
area_ratio = 8.5 * 11.0 * dst_dpi * dst_dpi / (dst_width * dst_height);
masterinfo->bmp.height = dst_height * area_ratio * 1.5;
bmp_alloc(&masterinfo->bmp);
bmp_fill(&masterinfo->bmp, 255, 255, 255);
BMPREGION region;
bmp_copy(srcgrey, src);
adjust_contrast(src, srcgrey, &white);
white_margins(src, srcgrey);
region.r1 = 0;
region.r2 = srcgrey->height - 1;
region.c1 = 0;
region.c2 = srcgrey->width - 1;
region.bgcolor = white;
region.bmp = src;
region.bmp8 = srcgrey;
masterinfo->bgcolor = white;
masterinfo->fit_to_page = dst_fit_to_page;
/* Check to see if master bitmap might need more room */
bmpregion_multicolumn_add(&region, masterinfo, 1, pageinfo, (int) (0.25 * src_dpi + .5));
master_bmp_width = masterinfo->bmp.width;
master_bmp_height = masterinfo->rows;
bmp_free(srcgrey);
}
void k2pdfopt_mupdf_reflow(fz_document *doc, fz_page *page, fz_context *ctx, \
double zoom, double gamma, double rot_deg) {
fz_device *dev;
fz_pixmap *pix;
fz_rect bounds,bounds2;
fz_matrix ctm;
fz_bbox bbox;
WILLUSBITMAP _src, *src;
double dpp;
double dpi = 250*zoom;
do {
dpp = dpi / 72.;
pix = NULL;
fz_var(pix);
bounds = fz_bound_page(doc, page);
ctm = fz_scale(dpp, dpp);
// ctm=fz_concat(ctm,fz_rotate(rotation));
bounds2 = fz_transform_rect(ctm, bounds);
bbox = fz_round_rect(bounds2);
printf("reading page:%d,%d,%d,%d dpi:%.0f\n",bbox.x0,bbox.y0,bbox.x1,bbox.y1,dpi);
zoom_value = zoom;
zoom *= shrink_factor;
dpi *= zoom;
} while (bbox.x1 > max_page_width_pix | bbox.y1 > max_page_height_pix);
// ctm=fz_translate(0,-page->mediabox.y1);
// ctm=fz_concat(ctm,fz_scale(dpp,-dpp));
// ctm=fz_concat(ctm,fz_rotate(page->rotate));
// ctm=fz_concat(ctm,fz_rotate(0));
// bbox=fz_round_rect(fz_transform_rect(ctm,page->mediabox));
// pix=fz_new_pixmap_with_rect(colorspace,bbox);
pix = fz_new_pixmap_with_bbox(ctx, fz_device_gray, bbox);
fz_clear_pixmap_with_value(ctx, pix, 0xff);
dev = fz_new_draw_device(ctx, pix);
#ifdef MUPDF_TRACE
fz_device *tdev;
fz_try(ctx) {
tdev = fz_new_trace_device(ctx);
fz_run_page(doc, page, tdev, ctm, NULL);
}
fz_always(ctx) {
fz_free_device(tdev);
}
#endif
fz_run_page(doc, page, dev, ctm, NULL);
fz_free_device(dev);
if(gamma >= 0.0) {
fz_gamma_pixmap(ctx, pix, gamma);
}
src = &_src;
masterinfo = &_masterinfo;
bmp_init(src);
int status = bmpmupdf_pixmap_to_bmp(src, ctx, pix);
k2pdfopt_reflow_bmp(masterinfo, src);
bmp_free(src);
fz_drop_pixmap(ctx, pix);
}
void k2pdfopt_djvu_reflow(ddjvu_page_t *page, ddjvu_context_t *ctx, \
ddjvu_render_mode_t mode, ddjvu_format_t *fmt, double zoom) {
WILLUSBITMAP _src, *src;
ddjvu_rect_t prect;
ddjvu_rect_t rrect;
int i, iw, ih, idpi, status;
double dpi = 250*zoom;
while (!ddjvu_page_decoding_done(page))
handle(1, ctx);
iw = ddjvu_page_get_width(page);
ih = ddjvu_page_get_height(page);
idpi = ddjvu_page_get_resolution(page);
prect.x = prect.y = 0;
do {
prect.w = iw * dpi / idpi;
prect.h = ih * dpi / idpi;
printf("reading page:%d,%d,%d,%d dpi:%.0f\n",prect.x,prect.y,prect.w,prect.h,dpi);
zoom_value = zoom;
zoom *= shrink_factor;
dpi *= zoom;
} while (prect.w > max_page_width_pix | prect.h > max_page_height_pix);
rrect = prect;
src = &_src;
masterinfo = &_masterinfo;
bmp_init(src);
src->width = prect.w = iw * dpi / idpi;
src->height = prect.h = ih * dpi / idpi;
src->bpp = 8;
rrect = prect;
bmp_alloc(src);
if (src->bpp == 8) {
int ii;
for (ii = 0; ii < 256; ii++)
src->red[ii] = src->blue[ii] = src->green[ii] = ii;
}
ddjvu_format_set_row_order(fmt, 1);
status = ddjvu_page_render(page, mode, &prect, &rrect, fmt,
bmp_bytewidth(src), (char *) src->data);
k2pdfopt_reflow_bmp(masterinfo, src);
bmp_free(src);
}
void k2pdfopt_rfbmp_size(int *width, int *height) {
*width = master_bmp_width;
*height = master_bmp_height;
}
void k2pdfopt_rfbmp_ptr(unsigned char** bmp_ptr_ptr) {
*bmp_ptr_ptr = masterinfo->bmp.data;
}
void k2pdfopt_rfbmp_zoom(double *zoom) {
*zoom = zoom_value;
}
/* ansi.c */
#define MAXSIZE 8000
static int ansi_on=1;
static char ansi_buffer[MAXSIZE];
int avprintf(FILE *f, char *fmt, va_list args)
{
int status;
{
if (!ansi_on) {
status = vsprintf(ansi_buffer, fmt, args);
ansi_parse(f, ansi_buffer);
} else
status = vfprintf(f, fmt, args);
}
return (status);
}
int aprintf(char *fmt, ...)
{
va_list args;
int status;
va_start(args, fmt);
status = avprintf(stdout, fmt, args);
va_end(args);
return (status);
}
/*
** Ensure that max_region_width_inches will be > MIN_REGION_WIDTH_INCHES
**
** Should only be called once, after all params are set.
**
*/
void adjust_params_init(void)
{
if (dst_landscape) {
dst_width = dst_userheight;
dst_height = dst_userwidth;
} else {
dst_width = dst_userwidth;
dst_height = dst_userheight;
}
if (dst_mar < 0.)
dst_mar = 0.02;
if (dst_martop < 0.)
dst_martop = dst_mar;
if (dst_marbot < 0.)
dst_marbot = dst_mar;
if (dst_marleft < 0.)
dst_marleft = dst_mar;
if (dst_marright < 0.)
dst_marright = dst_mar;
if ((double) dst_width / dst_dpi - dst_marleft
- dst_marright< MIN_REGION_WIDTH_INCHES) {
int olddpi;
olddpi = dst_dpi;
dst_dpi = (int) ((double) dst_width
/ (MIN_REGION_WIDTH_INCHES + dst_marleft + dst_marright));
aprintf(
TTEXT_BOLD2 "Output DPI of %d is too large. Reduced to %d." TTEXT_NORMAL "\n\n",
olddpi, dst_dpi);
}
}
void set_region_widths(void)
{
max_region_width_inches = display_width_inches = (double) dst_width
/ dst_dpi;
max_region_width_inches -= (dst_marleft + dst_marright);
/* This is ensured by adjust_dst_dpi() as of v1.17 */
/*
if (max_region_width_inches < MIN_REGION_WIDTH_INCHES)
max_region_width_inches = MIN_REGION_WIDTH_INCHES;
*/
}
/*
** Process full source page bitmap into rectangular regions and add
** to the destination bitmap. Start by looking for columns.
**
** level = recursion level. First call = 1, then 2, ...
**
*/
void bmpregion_multicolumn_add(BMPREGION *region, MASTERINFO *masterinfo,
int level, PAGEINFO *pageinfo, int colgap0_pixels)
{
static char *funcname = "bmpregion_multicolumn_add";
int *row_black_count;
int r2, rh, r0, cgr, maxlevel;
BMPREGION *srcregion, _srcregion;
BMPREGION *newregion, _newregion;
BMPREGION *pageregion;
double minh;
int ipr, npr, na;
int *colcount, *rowcount;
willus_dmem_alloc_warn(1, (void **) &colcount,
sizeof(int) * (region->c2 + 1), funcname, 10);
willus_dmem_alloc_warn(2, (void **) &rowcount,
sizeof(int) * (region->r2 + 1), funcname, 10);
maxlevel = max_columns / 2;
if (debug)
printf("@bmpregion_multicolumn_add (%d,%d) - (%d,%d) lev=%d\n",
region->c1, region->r1, region->c2, region->r2, level);
newregion = &_newregion;
(*newregion) = (*region);
/* Establish colcount, rowcount arrays */
bmpregion_trim_margins(newregion, colcount, rowcount, 0xf);
(*newregion) = (*region);
srcregion = &_srcregion;
(*srcregion) = (*region);
/* How many page regions do we need? */
minh = min_column_height_inches;
if (minh < .01)
minh = .1;
na = (srcregion->r2 - srcregion->r1 + 1) / src_dpi / minh;
if (na < 1)
na = 1;
na += 16;
/* Allocate page regions */
willus_dmem_alloc_warn(3, (void **) &pageregion, sizeof(BMPREGION) * na,
funcname, 10);
#ifdef COMMENT
mindr=src_dpi*.045; /* src->height/250; */
if (mindr<1)
mindr=1;
#endif
// white=250;
// for (i=0;i<src->width;i++)
// colcount[i]=0;
if (debug)
bmpregion_row_histogram(region);
/*
** Store information about which rows are mostly clear for future
** processing (saves processing time).
*/
willus_dmem_alloc_warn(4, (void **) &row_black_count,
region->bmp8->height * sizeof(int), funcname, 10);
for (cgr = 0, r0 = 0; r0 < region->bmp8->height; r0++) {
row_black_count[r0] = bmpregion_row_black_count(region, r0);
if (row_black_count[r0] == 0)
cgr++;
/*
int dr;
dr=mindr;
if (r0+dr>region->bmp8->height)
dr=region->bmp8->height-r0;
if ((row_is_clear[r0]=bmpregion_row_mostly_white(region,r0,dr))!=0)
cgr++;
*/
// printf("row_is_clear[%d]=%d\n",r0,row_is_clear[r0]);
}
if (verbose)
printf("%d clear rows.\n", cgr);
if (max_columns == 1) {
pageregion[0] = (*srcregion);
/* Set c1 negative to indicate full span */
pageregion[0].c1 = -1 - pageregion[0].c1;
npr = 1;
} else
/* Find all column dividers in source region and store sequentially in pageregion[] array */
for (npr = 0, rh = 0; srcregion->r1 <= srcregion->r2; srcregion->r1 +=
rh) {
static char *ierr =
TTEXT_WARN "\n\aInternal error--not enough allocated regions.\n"
"Please inform the developer at willus.com.\n\n" TTEXT_NORMAL;
if (npr >= na - 3) {
aprintf("%s", ierr);
break;
}
rh = bmpregion_find_multicolumn_divider(srcregion, row_black_count,
pageregion, &npr, colcount, rowcount);
if (verbose)
printf("rh=%d/%d\n", rh, region->r2 - region->r1 + 1);
}
/* Process page regions by column */
if (debug)
printf("Page regions: %d\n", npr);
r2 = -1;
for (ipr = 0; ipr < npr;) {
int r20, jpr, colnum, colgap_pixels;
for (colnum = 1; colnum <= 2; colnum++) {
if (debug) {
printf("ipr = %d of %d...\n", ipr, npr);
printf("COLUMN %d...\n", colnum);
}
r20 = r2;
for (jpr = ipr; jpr < npr; jpr += 2) {
/* If we get to a page region that spans the entire source, stop */
if (pageregion[jpr].c1 < 0)
break;
/* See if we should suspend this column and start displaying the next one */
if (jpr > ipr) {
double cpdiff, cdiv1, cdiv2, rowgap1_in, rowgap2_in;
if (column_offset_max < 0.)
break;
/* Did column divider move too much? */
cdiv1 = (pageregion[jpr].c2 + pageregion[jpr + 1].c1) / 2.;
cdiv2 = (pageregion[jpr - 2].c2 + pageregion[jpr - 1].c1)
/ 2.;
cpdiff = fabs(
(double) (cdiv1 - cdiv2)
/ (srcregion->c2 - srcregion->c1 + 1));
if (cpdiff > column_offset_max)
break;
/* Is gap between this column region and next column region too big? */
rowgap1_in = (double) (pageregion[jpr].r1
- pageregion[jpr - 2].r2) / src_dpi;
rowgap2_in = (double) (pageregion[jpr + 1].r1
- pageregion[jpr - 1].r2) / src_dpi;
if (rowgap1_in > 0.28 && rowgap2_in > 0.28)
break;
}
(*newregion) = pageregion[
src_left_to_right ?
jpr + colnum - 1 : jpr + (2 - colnum)];
/* Preserve vertical gap between this region and last region */
if (r20 >= 0 && newregion->r1 - r20 >= 0)
colgap_pixels = newregion->r1 - r20;
else
colgap_pixels = colgap0_pixels;
if (level < maxlevel)
bmpregion_multicolumn_add(newregion, masterinfo, level + 1,
pageinfo, colgap_pixels);
else {
bmpregion_vertically_break(newregion, masterinfo, text_wrap,
fit_columns ? -2.0 : -1.0, colcount, rowcount,
pageinfo, colgap_pixels, 2 * level);
}
r20 = newregion->r2;
}
if (r20 > r2)
r2 = r20;
if (jpr == ipr)
break;
}
if (jpr < npr && pageregion[jpr].c1 < 0) {
if (debug)
printf("SINGLE COLUMN REGION...\n");
(*newregion) = pageregion[jpr];
newregion->c1 = -1 - newregion->c1;
/* dst_add_gap_src_pixels("Col level",masterinfo,newregion->r1-r2); */
colgap_pixels = newregion->r1 - r2;
bmpregion_vertically_break(newregion, masterinfo, text_wrap,
(fit_columns && (level > 1)) ? -2.0 : -1.0, colcount,
rowcount, pageinfo, colgap_pixels, level);
r2 = newregion->r2;
jpr++;
}
ipr = jpr;
}
willus_dmem_free(4, (double **) &row_black_count, funcname);
willus_dmem_free(3, (double **) &pageregion, funcname);
willus_dmem_free(2, (double **) &rowcount, funcname);
willus_dmem_free(1, (double **) &colcount, funcname);
}
static void fit_column_to_screen(double column_width_inches)
{
double text_width_pixels, lm_pixels, rm_pixels, tm_pixels, bm_pixels;
if (!column_fitted) {
dpi_org = dst_dpi;
lm_org = dst_marleft;
rm_org = dst_marright;
tm_org = dst_martop;
bm_org = dst_marbot;
}
text_width_pixels = max_region_width_inches * dst_dpi;
lm_pixels = dst_marleft * dst_dpi;
rm_pixels = dst_marright * dst_dpi;
tm_pixels = dst_martop * dst_dpi;
bm_pixels = dst_marbot * dst_dpi;
dst_dpi = text_width_pixels / column_width_inches;
dst_marleft = lm_pixels / dst_dpi;
dst_marright = rm_pixels / dst_dpi;
dst_martop = tm_pixels / dst_dpi;
dst_marbot = bm_pixels / dst_dpi;
set_region_widths();
column_fitted = 1;
}
static void restore_output_dpi(void)
{
if (column_fitted) {
dst_dpi = dpi_org;
dst_marleft = lm_org;
dst_marright = rm_org;
dst_martop = tm_org;
dst_marbot = bm_org;
set_region_widths();
}
column_fitted = 0;
}
void adjust_contrast(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey, int *white)
{
int i, j, tries, wc, tc, hist[256];
double contrast, rat0;
WILLUSBITMAP *dst, _dst;
if (debug && verbose)
printf("\nAt adjust_contrast.\n");
if ((*white) <= 0)
(*white) = 192;
/* If contrast_max negative, use it as fixed contrast adjustment. */
if (contrast_max < 0.) {
bmp_contrast_adjust(srcgrey, srcgrey, -contrast_max);
if (dst_color && fabs(contrast_max + 1.0) > 1e-4)
bmp_contrast_adjust(src, src, -contrast_max);
return;
}
dst = &_dst;
bmp_init(dst);
wc = 0; /* Avoid compiler warning */
tc = srcgrey->width * srcgrey->height;
rat0 = 0.5; /* Avoid compiler warning */
for (contrast = 1.0, tries = 0; contrast < contrast_max + .01; tries++) {
if (fabs(contrast - 1.0) > 1e-4)
bmp_contrast_adjust(dst, srcgrey, contrast);
else
bmp_copy(dst, srcgrey);
/*Get bitmap histogram */
for (i = 0; i < 256; i++)
hist[i] = 0;
for (j = 0; j < dst->height; j++) {
unsigned char *p;
p = bmp_rowptr_from_top(dst, j);
for (i = 0; i < dst->width; i++, p++)
hist[p[0]]++;
}
if (tries == 0) {
int h1;
for (h1 = 0, j = (*white); j < 256; j++)
h1 += hist[j];
rat0 = (double) h1 / tc;
if (debug && verbose)
printf(" rat0 = rat[%d-255]=%.4f\n", (*white), rat0);
}
/* Find white ratio */
/*
for (wc=hist[254],j=253;j>=252;j--)
if (hist[j]>wc1)
wc1=hist[j];
*/
for (wc = 0, j = 252; j <= 255; j++)
wc += hist[j];
/*
if ((double)wc/tc >= rat0*0.7 && (double)hist[255]/wc > 0.995)
break;
*/
if (debug && verbose)
printf(" %2d. Contrast=%7.2f, rat[252-255]/rat0=%.4f\n",
tries + 1, contrast, (double) wc / tc / rat0);
if ((double) wc / tc >= rat0 * 0.94)
break;
contrast *= 1.05;
}
if (debug)
printf("Contrast=%7.2f, rat[252-255]/rat0=%.4f\n", contrast,
(double) wc / tc / rat0);
/*
bmp_write(dst,"outc.png",stdout,100);
wfile_written_info("outc.png",stdout);
exit(10);
*/
bmp_copy(srcgrey, dst);
/* Maybe don't adjust the contrast for the color bitmap? */
if (dst_color && fabs(contrast - 1.0) > 1e-4)
bmp_contrast_adjust(src, src, contrast);
bmp_free(dst);
}
static int bmpregion_row_black_count(BMPREGION *region, int r0)
{
unsigned char *p;
int i, nc, c;
p = bmp_rowptr_from_top(region->bmp8, r0) + region->c1;
nc = region->c2 - region->c1 + 1;
for (c = i = 0; i < nc; i++, p++)
if (p[0] < region->bgcolor)
c++;
return (c);
}
/*
** Returns height of region found and divider position in (*divider_column).
** (*divider_column) is absolute position on source bitmap.
**
*/
static int bmpregion_find_multicolumn_divider(BMPREGION *region,
int *row_black_count, BMPREGION *pageregion, int *npr, int *colcount,
int *rowcount)
{
int itop, i, dm, middle, divider_column, min_height_pixels, mhp2,
min_col_gap_pixels;
BMPREGION _newregion, *newregion, column[2];
BREAKINFO *breakinfo, _breakinfo;
int *rowmin, *rowmax;
static char *funcname = "bmpregion_find_multicolumn_divider";
if (debug)
printf("@bmpregion_find_multicolumn_divider(%d,%d)-(%d,%d)\n",
region->c1, region->r1, region->c2, region->r2);
breakinfo = &_breakinfo;
breakinfo->textrow = NULL;
breakinfo_alloc(101, breakinfo, region->r2 - region->r1 + 1);
bmpregion_find_vertical_breaks(region, breakinfo, colcount, rowcount,
column_row_gap_height_in);
/*
{
printf("region (%d,%d)-(%d,%d) has %d breaks:\n",region->c1,region->r1,region->c2,region->r2,breakinfo->n);
for (i=0;i<breakinfo->n;i++)
printf(" Rows %d - %d\n",breakinfo->textrow[i].r1,breakinfo->textrow[i].r2);
}
*/
newregion = &_newregion;
(*newregion) = (*region);
min_height_pixels = min_column_height_inches * src_dpi; /* src->height/15; */
mhp2 = min_height_pixels - 1;
if (mhp2 < 0)
mhp2 = 0;
dm = 1 + (region->c2 - region->c1 + 1) * column_gap_range / 2.;
middle = (region->c2 - region->c1 + 1) / 2;
min_col_gap_pixels = (int) (min_column_gap_inches * src_dpi + .5);
if (verbose) {
printf("(dm=%d, width=%d, min_gap=%d)\n", dm,
region->c2 - region->c1 + 1, min_col_gap_pixels);
printf("Checking regions (r1=%d, r2=%d, minrh=%d)..", region->r1,
region->r2, min_height_pixels);
fflush(stdout);
}
breakinfo_sort_by_row_position(breakinfo);
willus_dmem_alloc_warn(5, (void **) &rowmin,
(region->c2 + 10) * 2 * sizeof(int), funcname, 10);
rowmax = &rowmin[region->c2 + 10];
for (i = 0; i < region->c2 + 2; i++) {
rowmin[i] = region->r2 + 2;
rowmax[i] = -1;
}
/* Start with top-most and bottom-most regions, look for column dividers */
for (itop = 0;
itop < breakinfo->n
&& breakinfo->textrow[itop].r1
< region->r2 + 1 - min_height_pixels; itop++) {
int ibottom;
for (ibottom = breakinfo->n - 1;
ibottom >= itop
&& breakinfo->textrow[ibottom].r2
- breakinfo->textrow[itop].r1
>= min_height_pixels; ibottom--) {
/*
** Look for vertical shaft of clear space that clearly demarcates
** two columns
*/
for (i = 0; i < dm; i++) {
int foundgap, ii, c1, c2, iiopt, status;
newregion->c1 = region->c1 + middle - i;
/* If we've effectively already checked this shaft, move on */
if (itop >= rowmin[newregion->c1]
&& ibottom <= rowmax[newregion->c1])
continue;
newregion->c2 = newregion->c1 + min_col_gap_pixels - 1;
newregion->r1 = breakinfo->textrow[itop].r1;
newregion->r2 = breakinfo->textrow[ibottom].r2;
foundgap = bmpregion_is_clear(newregion, row_black_count,
gtc_in);
if (!foundgap && i > 0) {
newregion->c1 = region->c1 + middle + i;
newregion->c2 = newregion->c1 + min_col_gap_pixels - 1;
foundgap = bmpregion_is_clear(newregion, row_black_count,
gtc_in);
}
if (!foundgap)
continue;
/* Found a gap, but look for a better gap nearby */
c1 = newregion->c1;
c2 = newregion->c2;
for (iiopt = 0, ii = -min_col_gap_pixels;
ii <= min_col_gap_pixels; ii++) {
int newgap;
newregion->c1 = c1 + ii;
newregion->c2 = c2 + ii;
newgap = bmpregion_is_clear(newregion, row_black_count,
gtc_in);
if (newgap > 0 && newgap < foundgap) {
iiopt = ii;
foundgap = newgap;
if (newgap == 1)
break;
}
}
newregion->c1 = c1 + iiopt;
/* If we've effectively already checked this shaft, move on */
if (itop >= rowmin[newregion->c1]
&& ibottom <= rowmax[newregion->c1])
continue;
newregion->c2 = c2 + iiopt;
divider_column = newregion->c1 + min_col_gap_pixels / 2;
status = bmpregion_column_height_and_gap_test(column, region,
breakinfo->textrow[itop].r1,
breakinfo->textrow[ibottom].r2, divider_column,
colcount, rowcount);
/* If fails column height or gap test, mark as bad */
if (status) {
if (itop < rowmin[newregion->c1])
rowmin[newregion->c1] = itop;
if (ibottom > rowmax[newregion->c1])
rowmax[newregion->c1] = ibottom;
}
/* If right column too short, stop looking */
if (status & 2)
break;
if (!status) {
int colheight;
/* printf(" GOT COLUMN DIVIDER AT x=%d.\n",(*divider_column)); */
if (verbose) {
printf("\n GOOD REGION: col gap=(%d,%d) - (%d,%d)\n"
" r1=%d, r2=%d\n",
newregion->c1, newregion->r1, newregion->c2,
newregion->r2, breakinfo->textrow[itop].r1,
breakinfo->textrow[ibottom].r2);
}
if (itop > 0) {
/* add 1-column region */
pageregion[(*npr)] = (*region);
pageregion[(*npr)].r2 = breakinfo->textrow[itop - 1].r2;
if (pageregion[(*npr)].r2
> pageregion[(*npr)].bmp8->height - 1)
pageregion[(*npr)].r2 =
pageregion[(*npr)].bmp8->height - 1;
bmpregion_trim_margins(&pageregion[(*npr)], colcount,
rowcount, 0xf);
/* Special flag to indicate full-width region */
pageregion[(*npr)].c1 = -1 - pageregion[(*npr)].c1;
(*npr) = (*npr) + 1;
}
pageregion[(*npr)] = column[0];
(*npr) = (*npr) + 1;
pageregion[(*npr)] = column[1];
(*npr) = (*npr) + 1;
colheight = breakinfo->textrow[ibottom].r2 - region->r1 + 1;
breakinfo_free(101, breakinfo);
/*
printf("Returning %d divider column = %d - %d\n",region->r2-region->r1+1,newregion->c1,newregion->c2);
*/
return (colheight);
}
}
}
}
if (verbose)
printf("NO GOOD REGION FOUND.\n");
pageregion[(*npr)] = (*region);
bmpregion_trim_margins(&pageregion[(*npr)], colcount, rowcount, 0xf);
/* Special flag to indicate full-width region */
pageregion[(*npr)].c1 = -1 - pageregion[(*npr)].c1;
(*npr) = (*npr) + 1;
/* (*divider_column)=region->c2+1; */
willus_dmem_free(5, (double **) &rowmin, funcname);
breakinfo_free(101, breakinfo);
/*
printf("Returning %d\n",region->r2-region->r1+1);
*/
return (region->r2 - region->r1 + 1);
}
/*
** 1 = column 1 too short
** 2 = column 2 too short
** 3 = both too short
** 0 = both okay
** Both columns must pass height requirement.
**
** Also, if gap between columns > max_column_gap_inches, fails test. (8-31-12)
**
*/
static int bmpregion_column_height_and_gap_test(BMPREGION *column,
BMPREGION *region, int r1, int r2, int cmid, int *colcount,
int *rowcount)
{
int min_height_pixels, status;
status = 0;
min_height_pixels = min_column_height_inches * src_dpi;
column[0] = (*region);
column[0].r1 = r1;
column[0].r2 = r2;
column[0].c2 = cmid - 1;
bmpregion_trim_margins(&column[0], colcount, rowcount, 0xf);
/*
printf(" COL1: pix=%d (%d - %d)\n",newregion->r2-newregion->r1+1,newregion->r1,newregion->r2);
*/
if (column[0].r2 - column[0].r1 + 1 < min_height_pixels)
status |= 1;
column[1] = (*region);
column[1].r1 = r1;
column[1].r2 = r2;
column[1].c1 = cmid;
column[1].c2 = region->c2;
bmpregion_trim_margins(&column[1], colcount, rowcount, 0xf);
/*
printf(" COL2: pix=%d (%d - %d)\n",newregion->r2-newregion->r1+1,newregion->r1,newregion->r2);
*/
if (column[1].r2 - column[1].r1 + 1 < min_height_pixels)
status |= 2;
/* Make sure gap between columns is not too large */
if (max_column_gap_inches >= 0.
&& column[1].c1 - column[0].c2 - 1
> max_column_gap_inches * src_dpi)
status |= 4;
return (status);
}
/*
** Return 0 if there are dark pixels in the region. NZ otherwise.
*/
static int bmpregion_is_clear(BMPREGION *region, int *row_black_count,
double gt_in)
{
int r, c, nc, pt;
/*
** row_black_count[] doesn't necessarily match up to this particular region's columns.
** So if row_black_count[] == 0, the row is clear, otherwise it has to be counted.
** because the columns are a subset.
*/
/* nr=region->r2-region->r1+1; */
nc = region->c2 - region->c1 + 1;
pt = (int) (gt_in * src_dpi * nc + .5);
if (pt < 0)
pt = 0;
for (c = 0, r = region->r1; r <= region->r2; r++) {
if (r < 0 || r >= region->bmp8->height)
continue;
if (row_black_count[r] == 0)
continue;
c += bmpregion_row_black_count(region, r);
if (c > pt)
return (0);
}
/*
printf("(%d,%d)-(%d,%d): c=%d, pt=%d (gt_in=%g)\n",
region->c1,region->r1,region->c2,region->r2,c,pt,gt_in);
*/
return (1 + (int) 10 * c / pt);
}
static void bmpregion_row_histogram(BMPREGION *region)
{
static char *funcname = "bmpregion_row_histogram";
WILLUSBITMAP *src;
FILE *out;
static int *rowcount;
static int *hist;
int i, j, nn;
willus_dmem_alloc_warn(6, (void **) &rowcount,
(region->r2 - region->r1 + 1) * sizeof(int), funcname, 10);
willus_dmem_alloc_warn(7, (void **) &hist,
(region->c2 - region->c1 + 1) * sizeof(int), funcname, 10);
src = region->bmp8;
for (j = region->r1; j <= region->r2; j++) {
unsigned char *p;
p = bmp_rowptr_from_top(src, j) + region->c1;
rowcount[j - region->r1] = 0;
for (i = region->c1; i <= region->c2; i++, p++)
if (p[0] < region->bgcolor)
rowcount[j - region->r1]++;
}
for (i = region->c1; i <= region->c2; i++)
hist[i - region->c1] = 0;
for (i = region->r1; i <= region->r2; i++)
hist[rowcount[i - region->r1]]++;
for (i = region->c2 - region->c1 + 1; i >= 0; i--)
if (hist[i] > 0)
break;
nn = i;
out = fopen("hist.ep", "w");
for (i = 0; i <= nn; i++)
fprintf(out, "%5d %5d\n", i, hist[i]);
fclose(out);
out = fopen("rowcount.ep", "w");
for (i = 0; i < region->r2 - region->r1 + 1; i++)
fprintf(out, "%5d %5d\n", i, rowcount[i]);
fclose(out);
willus_dmem_free(7, (double **) &hist, funcname);
willus_dmem_free(6, (double **) &rowcount, funcname);
}
/*
** Mark the region
** mark_flags & 1 : Mark top
** mark_flags & 2 : Mark bottom
** mark_flags & 4 : Mark left
** mark_flags & 8 : Mark right
**
*/
static void mark_source_page(BMPREGION *region0, int caller_id, int mark_flags)
{
static int display_order = 0;
int i, n, nn, fontsize, r, g, b, shownum;
char num[16];
BMPREGION *region, _region;
BMPREGION *clip, _clip;
if (!show_marked_source)
return;
if (region0 == NULL) {
display_order = 0;
return;
}
region = &_region;
(*region) = (*region0);
/* Clip the region w/ignored margins */
clip = &_clip;
clip->bmp = region0->bmp;
get_white_margins(clip);
if (region->c1 < clip->c1)
region->c1 = clip->c1;
if (region->c2 > clip->c2)
region->c2 = clip->c2;
if (region->r1 < clip->r1)
region->r1 = clip->r1;
if (region->r2 > clip->r2)
region->r2 = clip->r2;
if (region->r2 <= region->r1 || region->c2 <= region->c1)
return;
/* printf("@mark_source_page(display_order=%d)\n",display_order); */
if (caller_id == 1) {
display_order++;
shownum = 1;
n = (int) (src_dpi / 60. + 0.5);
if (n < 5)
n = 5;
r = 255;
g = b = 0;
} else if (caller_id == 2) {
shownum = 0;
n = 2;
r = 0;
g = 0;
b = 255;
} else if (caller_id == 3) {
shownum = 0;
n = (int) (src_dpi / 80. + 0.5);
if (n < 4)
n = 4;
r = 0;
g = 255;
b = 0;
} else if (caller_id == 4) {
shownum = 0;
n = 2;
r = 255;
g = 0;
b = 255;
} else {
shownum = 0;
n = 2;
r = 140;
g = 140;
b = 140;
}
if (n < 2)
n = 2;
nn = (region->c2 + 1 - region->c1) / 2;
if (n > nn)
n = nn;
nn = (region->r2 + 1 - region->r1) / 2;
if (n > nn)
n = nn;
if (n < 1)
n = 1;
for (i = 0; i < n; i++) {
int j;
unsigned char *p;
if (mark_flags & 1) {
p = bmp_rowptr_from_top(region->marked, region->r1 + i)
+ region->c1 * 3;
for (j = region->c1; j <= region->c2; j++, p += 3) {
p[0] = r;
p[1] = g;
p[2] = b;
}
}
if (mark_flags & 2) {
p = bmp_rowptr_from_top(region->marked, region->r2 - i)
+ region->c1 * 3;
for (j = region->c1; j <= region->c2; j++, p += 3) {
p[0] = r;
p[1] = g;
p[2] = b;
}
}
if (mark_flags & 16) /* rowbase */
{
p = bmp_rowptr_from_top(region->marked, region->rowbase - i)
+ region->c1 * 3;
for (j = region->c1; j <= region->c2; j++, p += 3) {
p[0] = r;
p[1] = g;
p[2] = b;
}
}
if (mark_flags & 4)
for (j = region->r1; j <= region->r2; j++) {
p = bmp_rowptr_from_top(region->marked, j)
+ (region->c1 + i) * 3;
p[0] = r;
p[1] = g;
p[2] = b;
}
if (mark_flags & 8)
for (j = region->r1; j <= region->r2; j++) {
p = bmp_rowptr_from_top(region->marked, j)
+ (region->c2 - i) * 3;
p[0] = r;
p[1] = g;
p[2] = b;
}
}
if (!shownum)
return;
fontsize = region->c2 - region->c1 + 1;
if (fontsize > region->r2 - region->r1 + 1)
fontsize = region->r2 - region->r1 + 1;
fontsize /= 2;
if (fontsize > src_dpi)
fontsize = src_dpi;
if (fontsize < 5)
return;
fontrender_set_typeface("helvetica-bold");
fontrender_set_fgcolor(r, g, b);
fontrender_set_bgcolor(255, 255, 255);
fontrender_set_pixel_size(fontsize);
fontrender_set_justification(4);
fontrender_set_or(1);
sprintf(num, "%d", display_order);
fontrender_render(region->marked, (double) (region->c1 + region->c2) / 2.,
(double) (region->marked->height - ((region->r1 + region->r2) / 2.)),
num, 0, NULL);
/* printf(" done mark_source_page.\n"); */
}
/*
** Input: A generic rectangular region from the source file. It will not
** be checked for multiple columns, but the text may be wrapped
** (controlled by allow_text_wrapping input).
**
** force_scale == -2 : Use same scale for entire column--fit to device
**
** This function looks for vertical gaps in the region and breaks it at
** the widest ones (if there are significantly wider ones).
**
*/
static void bmpregion_vertically_break(BMPREGION *region,
MASTERINFO *masterinfo, int allow_text_wrapping, double force_scale,
int *colcount, int *rowcount, PAGEINFO *pageinfo, int colgap_pixels,
int ncols)
{
static int ncols_last = -1;
int regcount, i, i1, biggap, revert, trim_flags, allow_vertical_breaks;
int justification_flags, caller_id, marking_flags, rbdelta;
// int trim_left_and_right;
BMPREGION *bregion, _bregion;
BREAKINFO *breakinfo, _breakinfo;
double region_width_inches, region_height_inches;
#if (WILLUSDEBUGX & 1)
printf("\n\n@bmpregion_vertically_break. colgap_pixels=%d\n\n",colgap_pixels);
#endif
trim_flags = 0xf;
allow_vertical_breaks = 1;
justification_flags = 0x8f; /* Don't know region justification status yet. Use user settings. */
rbdelta = -1;
breakinfo = &_breakinfo;
breakinfo->textrow = NULL;
breakinfo_alloc(102, breakinfo, region->r2 - region->r1 + 1);
bmpregion_find_vertical_breaks(region, breakinfo, colcount, rowcount, -1.0);
/* Should there be a check for breakinfo->n==0 here? */
/* Don't think it breaks anything to let it go. -- 6-11-12 */
#if (WILLUSDEBUGX & 2)
breakinfo_echo(breakinfo);
#endif
breakinfo_remove_small_rows(breakinfo, 0.25, 0.5, region, colcount,
rowcount);
#if (WILLUSDEBUGX & 2)
breakinfo_echo(breakinfo);
#endif
breakinfo->centered = bmpregion_is_centered(region, breakinfo, 0,
breakinfo->n - 1, NULL);
#if (WILLUSDEBUGX & 2)
breakinfo_echo(breakinfo);
#endif
/*
newregion=&_newregion;
for (i=0;i<breakinfo->n;i++)
{
(*newregion)=(*region);
newregion->r1=breakinfo->textrow[i].r1;
newregion->r2=breakinfo->textrow[i].r2;
bmpregion_add(newregion,breakinfo,masterinfo,allow_text_wrapping,force_scale,0,1,
colcount,rowcount,pageinfo,0,0xf);
}
breakinfo_free(breakinfo);
return;
*/
/*
if (!vertical_breaks)
{
caller_id=100;
marking_flags=0;
bmpregion_add(region,breakinfo,masterinfo,allow_text_wrapping,trim_flags,
allow_vertical_breaks,force_scale,justification_flags,
caller_id,colcount,rowcount,pageinfo,marking_flags,rbdelta);
breakinfo_free(breakinfo);
return;
}
*/
/* Red, numbered region */
mark_source_page(region, 1, 0xf);
bregion = &_bregion;
if (debug) {
if (!allow_text_wrapping)
printf(
"@bmpregion_vertically_break (no break) (%d,%d) - (%d,%d) (scale=%g)\n",
region->c1, region->r1, region->c2, region->r2,
force_scale);
else
printf(
"@bmpregion_vertically_break (allow break) (%d,%d) - (%d,%d) (scale=%g)\n",
region->c1, region->r1, region->c2, region->r2,
force_scale);
}
/*
** Tag blank rows and columns
*/
if (vertical_break_threshold < 0. || breakinfo->n < 6)
biggap = -1.;
else {
int gap_median;
/*
int rowheight_median;
breakinfo_sort_by_rowheight(breakinfo);
rowheight_median = breakinfo->textrow[breakinfo->n/2].rowheight;
*/
#ifdef WILLUSDEBUG
for (i=0;i<breakinfo->n;i++)
printf(" gap[%d]=%d\n",i,breakinfo->textrow[i].gap);
#endif
breakinfo_sort_by_gap(breakinfo);
gap_median = breakinfo->textrow[breakinfo->n / 2].gap;
#ifdef WILLUSDEBUG
printf(" median=%d\n",gap_median);
#endif
biggap = gap_median * vertical_break_threshold;
breakinfo_sort_by_row_position(breakinfo);
}
#ifdef WILLUSDEBUG
printf(" biggap=%d\n",biggap);
#endif
region_width_inches = (double) (region->c2 - region->c1 + 1) / src_dpi;
region_height_inches = (double) (region->r2 - region->r1 + 1) / src_dpi;
/*
trim_left_and_right = 1;
if (region_width_inches <= max_region_width_inches)
trim_left_and_right = 0;
*/
/*
printf("force_scale=%g, rwi = %g, rwi/mrwi = %g, rhi = %g\n",
force_scale,
region_width_inches,
region_width_inches / max_region_width_inches,
region_height_inches);
*/
if (force_scale < -1.5 && region_width_inches > MIN_REGION_WIDTH_INCHES
&& region_width_inches / max_region_width_inches < 1.25
&& region_height_inches > 0.5) {
revert = 1;
force_scale = -1.0;
fit_column_to_screen(region_width_inches);
// trim_left_and_right = 0;
allow_text_wrapping = 0;
} else
revert = 0;
/* Add the regions (broken vertically) */
caller_id = 1;
/*
if (trim_left_and_right)
trim_flags=0xf;
else
trim_flags=0xc;
*/
trim_flags = 0xf;
for (regcount = i1 = i = 0; i1 < breakinfo->n; i++) {
int i2;
i2 = i < breakinfo->n ? i : breakinfo->n - 1;
if (i >= breakinfo->n
|| (biggap > 0. && breakinfo->textrow[i2].gap >= biggap)) {
int j, c1, c2, nc, nowrap;
double regwidth, ar1, rh1;
// printf("CALLER 1: i1=%d, i2=%d (breakinfo->n=%d)\n",i1,i2,breakinfo->n);
(*bregion) = (*region);
bregion->r1 = breakinfo->textrow[i1].r1;
bregion->r2 = breakinfo->textrow[i2].r2;
c1 = breakinfo->textrow[i1].c1;
c2 = breakinfo->textrow[i1].c2;
nc = c2 - c1 + 1;
if (nc <= 0)
nc = 1;
rh1 = (double) (breakinfo->textrow[i1].r2
- breakinfo->textrow[i1].r1 + 1) / src_dpi;
ar1 = (double) (breakinfo->textrow[i1].r2
- breakinfo->textrow[i1].r1 + 1) / nc;
for (j = i1 + 1; j <= i2; j++) {
if (c1 > breakinfo->textrow[j].c1)
c1 = breakinfo->textrow[j].c1;
if (c2 < breakinfo->textrow[j].c2)
c2 = breakinfo->textrow[j].c2;
}
regwidth = (double) (c2 - c1 + 1) / src_dpi;
marking_flags = (i1 == 0 ? 0 : 1)
| (i2 == breakinfo->n - 1 ? 0 : 2);
/* Green */
mark_source_page(bregion, 3, marking_flags);
nowrap = ((regwidth <= max_region_width_inches
&& allow_text_wrapping < 2)
|| (ar1 > no_wrap_ar_limit
&& rh1 > no_wrap_height_limit_inches));
/*
** If between regions, or if the next region isn't going to be
** wrapped, or if the next region starts a different number of
** columns than before, then "flush and gap."
*/
if (regcount > 0 || just_flushed_internal || nowrap
|| (ncols_last > 0 && ncols_last != ncols)) {
int gap;
#ifdef WILLUSDEBUG
printf("wrapflush1\n");
#endif
if (!just_flushed_internal)
wrapbmp_flush(masterinfo, 0, pageinfo, 0);
gap = regcount == 0 ?
colgap_pixels : breakinfo->textrow[i1 - 1].gap;
if (regcount == 0 && beginning_gap_internal > 0) {
if (last_h5050_internal > 0) {
if (fabs(
1.
- (double) breakinfo->textrow[i1].h5050
/ last_h5050_internal) > .1)
dst_add_gap_src_pixels("Col/Page break", masterinfo,
colgap_pixels);
last_h5050_internal = -1;
}
gap = beginning_gap_internal;
beginning_gap_internal = -1;
}
dst_add_gap_src_pixels("Vert break", masterinfo, gap);
} else {
if (regcount == 0 && beginning_gap_internal < 0)
beginning_gap_internal = colgap_pixels;
}
bmpregion_add(bregion, breakinfo, masterinfo, allow_text_wrapping,
trim_flags, allow_vertical_breaks, force_scale,
justification_flags, caller_id, colcount, rowcount,
pageinfo, marking_flags, rbdelta);
regcount++;
i1 = i2 + 1;
}
}
ncols_last = ncols;
if (revert)
restore_output_dpi();
breakinfo_free(102, breakinfo);
}
/*
**
** MAIN BITMAP REGION ADDING FUNCTION
**
** NOTE: This function calls itself recursively!
**
** Input: A generic rectangular region from the source file. It will not
** be checked for multiple columns, but the text may be wrapped
** (controlled by allow_text_wrapping input).
**
** First, excess margins are trimmed off of the region.
**
** Then, if the resulting trimmed region is wider than the max desirable width
** and allow_text_wrapping is non-zero, then the
** bmpregion_analyze_justification_and_line_spacing() function is called.
** Otherwise the region is scaled to fit and added to the master set of pages.
**
** justification_flags
** Bits 6-7: 0 = document is not fully justified
** 1 = document is fully justified
** 2 = don't know document justification yet
** Bits 4-5: 0 = Use user settings
** 1 = fully justify
** 2 = do not fully justify
** Bits 2-3: 0 = document is left justified
** 1 = document is centered
** 2 = document is right justified
** 3 = don't know document justification yet
** Bits 0-1: 0 = left justify document
** 1 = center document
** 2 = right justify document
** 3 = Use user settings
**
** force_scale = -2.0 : Fit column width to display width
** force_scale = -1.0 : Use output dpi unless the region doesn't fit.
** In that case, scale it down until it fits.
** force_scale > 0.0 : Scale region by force_scale.
**
** mark_flags & 1 : Mark top
** mark_flags & 2 : Mark bottom
** mark_flags & 4 : Mark left
** mark_flags & 8 : Mark right
**
** trim_flags & 0x80 : Do NOT re-trim no matter what.
**
*/
static void bmpregion_add(BMPREGION *region, BREAKINFO *breakinfo,
MASTERINFO *masterinfo, int allow_text_wrapping, int trim_flags,
int allow_vertical_breaks, double force_scale, int justification_flags,
int caller_id, int *colcount, int *rowcount, PAGEINFO *pageinfo,
int mark_flags, int rowbase_delta)
{
int w, wmax, i, nc, nr, h, bpp, tall_region;
double region_width_inches;
WILLUSBITMAP *bmp, _bmp;
BMPREGION *newregion, _newregion;
newregion = &_newregion;
(*newregion) = (*region);
#if (WILLUSDEBUGX & 1)
printf("@bmpregion_add (%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2);
printf(" trimflags = %X\n",trim_flags);
#endif
if (debug) {
if (!allow_text_wrapping)
printf("@bmpregion_add (no break) (%d,%d) - (%d,%d) (scale=%g)\n",
region->c1, region->r1, region->c2, region->r2,
force_scale);
else
printf(
"@bmpregion_add (allow break) (%d,%d) - (%d,%d) (scale=%g)\n",
region->c1, region->r1, region->c2, region->r2,
force_scale);
}
/*
** Tag blank rows and columns and trim the blank margins off
** trimflags = 0xf for all margin trim.
** trimflags = 0xc for just top and bottom margins.
*/
bmpregion_trim_margins(newregion, colcount, rowcount, trim_flags);
#if (WILLUSDEBUGX & 1)
printf(" After trim: (%d,%d) - (%d,%d)\n",newregion->c1,newregion->r1,newregion->c2,newregion->r2);
#endif
nc = newregion->c2 - newregion->c1 + 1;
nr = newregion->r2 - newregion->r1 + 1;
// printf("nc=%d, nr=%d\n",nc,nr);
if (verbose) {
printf(" row range adjusted to %d - %d\n", newregion->r1,
newregion->r2);
printf(" col range adjusted to %d - %d\n", newregion->c1,
newregion->c2);
}
if (nc <= 5 || nr <= 1)
return;
region_width_inches = (double) nc / src_dpi;
// printf("regwidth = %g in\n",region_width_inches);
/* Use untrimmed region left/right if possible */
if (caller_id == 1 && region_width_inches <= max_region_width_inches) {
int trimleft, trimright;
int maxpix, dpix;
maxpix = (int) (max_region_width_inches * src_dpi + .5);
#if (WILLUSDEBUGX & 1)
printf(" Trimming. C's = %4d %4d %4d %4d\n",region->c1,newregion->c1,newregion->c2,region->c2);
printf(" maxpix = %d, regwidth = %d\n",maxpix,region->c2-region->c1+1);
#endif
if (maxpix > (region->c2 - region->c1 + 1))
maxpix = region->c2 - region->c1 + 1;
// printf(" maxpix = %d\n",maxpix);
dpix = (region->c2 - region->c1 + 1 - maxpix) / 2;
// printf(" dpix = %d\n",dpix);
trimright = region->c2 - newregion->c2;
trimleft = newregion->c1 - region->c1;
if (trimleft < trimright) {
if (trimleft > dpix)
newregion->c1 = region->c1 + dpix;
newregion->c2 = newregion->c1 + maxpix - 1;
} else {
if (trimright > dpix)
newregion->c2 = region->c2 - dpix;
newregion->c1 = newregion->c2 - maxpix + 1;
}
if (newregion->c1 < region->c1)
newregion->c1 = region->c1;
if (newregion->c2 > region->c2)
newregion->c2 = region->c2;
nc = newregion->c2 - newregion->c1 + 1;
#if (WILLUSDEBUGX & 1)
printf(" Post Trim. C's = %4d %4d %4d %4d\n",region->c1,newregion->c1,newregion->c2,region->c2);
#endif
region_width_inches = (double) nc / src_dpi;
}
/*
** Try breaking the region into smaller horizontal pieces (wrap text lines)
*/
/*
printf("allow_text_wrapping=%d, region_width_inches=%g, max_region_width_inches=%g\n",
allow_text_wrapping,region_width_inches,max_region_width_inches);
*/
/* New in v1.50, if allow_text_wrapping==2, unwrap short lines. */
if (allow_text_wrapping == 2
|| (allow_text_wrapping == 1
&& region_width_inches > max_region_width_inches)) {
bmpregion_analyze_justification_and_line_spacing(newregion, breakinfo,
masterinfo, colcount, rowcount, pageinfo, 1, force_scale);
return;
}
/*
** If allowed, re-submit each vertical region individually
*/
if (allow_vertical_breaks) {
bmpregion_analyze_justification_and_line_spacing(newregion, breakinfo,
masterinfo, colcount, rowcount, pageinfo, 0, force_scale);
return;
}
/* AT THIS POINT, BITMAP IS NOT TO BE BROKEN UP HORIZONTALLY OR VERTICALLY */
/* (IT CAN STILL BE FULLY JUSTIFIED IF ALLOWED.) */
/*
** Scale region to fit the destination device width and add to the master bitmap.
**
**
** Start by copying source region to new bitmap
**
*/
// printf("c1=%d\n",newregion->c1);
/* Is it a figure? */
tall_region = (double) (newregion->r2 - newregion->r1 + 1) / src_dpi
>= dst_min_figure_height_in;
/* Re-trim left and right? */
if ((trim_flags & 0x80) == 0) {
/* If tall region and figure justification turned on ... */
if ((tall_region && dst_figure_justify >= 0)
/* ... or if centered region ... */
|| ((trim_flags & 3) != 3
&& ((justification_flags & 3) == 1
|| ((justification_flags & 3) == 3
&& (dst_justify == 1
|| (dst_justify < 0
&& (justification_flags
& 0xc) == 4)))))) {
bmpregion_trim_margins(newregion, colcount, rowcount, 0x3);
nc = newregion->c2 - newregion->c1 + 1;
region_width_inches = (double) nc / src_dpi;
}
}
#if (WILLUSDEBUGX & 1)
aprintf("atomic region: " ANSI_CYAN "%.2f x %.2f in" ANSI_NORMAL " c1=%d, (%d x %d) (rbdel=%d) just=0x%02X\n",
(double)(newregion->c2-newregion->c1+1)/src_dpi,
(double)(newregion->r2-newregion->r1+1)/src_dpi,
newregion->c1,
(newregion->c2-newregion->c1+1),
(newregion->r2-newregion->r1+1),
rowbase_delta,justification_flags);
#endif
/* Copy atomic region into bmp */
bmp = &_bmp;
bmp_init(bmp);
bmp->width = nc;
bmp->height = nr;
if (dst_color)
bmp->bpp = 24;
else {
bmp->bpp = 8;
for (i = 0; i < 256; i++)
bmp->red[i] = bmp->blue[i] = bmp->green[i] = i;
}
bmp_alloc(bmp);
bpp = dst_color ? 3 : 1;
// printf("r1=%d, r2=%d\n",newregion->r1,newregion->r2);
for (i = newregion->r1; i <= newregion->r2; i++) {
unsigned char *psrc, *pdst;
pdst = bmp_rowptr_from_top(bmp, i - newregion->r1);
psrc = bmp_rowptr_from_top(dst_color ? newregion->bmp : newregion->bmp8,
i) + bpp * newregion->c1;
memcpy(pdst, psrc, nc * bpp);
}
/*
** Now scale to appropriate destination size.
**
** force_scale is used to maintain uniform scaling so that
** most of the regions are scaled at the same value.
**
** force_scale = -2.0 : Fit column width to display width
** force_scale = -1.0 : Use output dpi unless the region doesn't fit.
** In that case, scale it down until it fits.
** force_scale > 0.0 : Scale region by force_scale.
**
*/
/* Max viewable pixel width on device screen */
wmax = (int) (masterinfo->bmp.width - (dst_marleft + dst_marright) * dst_dpi
+ 0.5);
if (force_scale > 0.)
w = (int) (force_scale * bmp->width + 0.5);
else {
if (region_width_inches < max_region_width_inches)
w = (int) (region_width_inches * dst_dpi + .5);
else
w = wmax;
}
/* Special processing for tall regions (likely figures) */
if (tall_region && w < wmax && dst_fit_to_page != 0) {
if (dst_fit_to_page < 0)
w = wmax;
else {
w = (int) (w * (1. + (double) dst_fit_to_page / 100.) + 0.5);
if (w > wmax)
w = wmax;
}
}
h = (int) (((double) w / bmp->width) * bmp->height + .5);
/*
** If scaled dimensions are finite, add to master bitmap.
*/
if (w > 0 && h > 0) {
WILLUSBITMAP *tmp, _tmp;
int nocr;
last_scale_factor_internal = (double) w / bmp->width;
#ifdef HAVE_OCR
if (dst_ocr)
{
nocr=(int)((double)bmp->width/w+0.5);
if (nocr < 1)
nocr=1;
if (nocr > 10)
nocr=10;
w *= nocr;
h *= nocr;
}
else
#endif
nocr = 1;
tmp = &_tmp;
bmp_init(tmp);
bmp_resample(tmp, bmp, (double) 0., (double) 0., (double) bmp->width,
(double) bmp->height, w, h);
bmp_free(bmp);
/*
{
static int nn=0;
char filename[256];
sprintf(filename,"xxx%02d.png",nn++);
bmp_write(tmp,filename,stdout,100);
}
*/
/*
** Add scaled bitmap to destination.
*/
/* Allocate more rows if necessary */
while (masterinfo->rows + tmp->height / nocr > masterinfo->bmp.height)
bmp_more_rows(&masterinfo->bmp, 1.4, 255);
/* Check special justification for tall regions */
if (tall_region && dst_figure_justify >= 0)
justification_flags = dst_figure_justify;
bmp_src_to_dst(masterinfo, tmp, justification_flags, region->bgcolor,
nocr, (int) ((double) src_dpi * tmp->width / bmp->width + .5));
bmp_free(tmp);
}
/* Store delta to base of text row (used by wrapbmp_flush()) */
last_rowbase_internal = rowbase_delta;
/* .05 was .072 in v1.35 */
/* dst_add_gap(&masterinfo->bmp,&masterinfo->rows,0.05); */
/*
if (revert)
restore_output_dpi();
*/
}
static void dst_add_gap_src_pixels(char *caller, MASTERINFO *masterinfo,
int pixels)
{
double gap_inches;
/*
aprintf("%s " ANSI_GREEN "dst_add" ANSI_NORMAL " %.3f in (%d pix)\n",caller,(double)pixels/src_dpi,pixels);
*/
if (last_scale_factor_internal < 0.)
gap_inches = (double) pixels / src_dpi;
else
gap_inches = (double) pixels * last_scale_factor_internal / dst_dpi;
gap_inches *= vertical_multiplier;
if (gap_inches > max_vertical_gap_inches)
gap_inches = max_vertical_gap_inches;
dst_add_gap(masterinfo, gap_inches);
}
static void dst_add_gap(MASTERINFO *masterinfo, double inches)
{
int n, bw;
unsigned char *p;
n = (int) (inches * dst_dpi + .5);
if (n < 1)
n = 1;
while (masterinfo->rows + n > masterinfo->bmp.height)
bmp_more_rows(&masterinfo->bmp, 1.4, 255);
bw = bmp_bytewidth(&masterinfo->bmp) * n;
p = bmp_rowptr_from_top(&masterinfo->bmp, masterinfo->rows);
memset(p, 255, bw);
masterinfo->rows += n;
}
/*
**
** Add already-scaled source bmp to destination bmp.
** Source bmp may be narrower than destination--if so, it may be fully justifed.
** dst = destination bitmap
** src = source bitmap
** dst and src bpp must match!
** All rows of src are applied to masterinfo->bmp starting at row masterinfo->rows
** Full justification is done if requested.
**
*/
static void bmp_src_to_dst(MASTERINFO *masterinfo, WILLUSBITMAP *src,
int justification_flags, int whitethresh, int nocr, int dpi)
{
WILLUSBITMAP *src1, _src1;
WILLUSBITMAP *tmp;
#ifdef HAVE_OCR
WILLUSBITMAP _tmp;
OCRWORDS _words,*words;
#endif
int dw, dw2;
int i, srcbytespp, srcbytewidth, go_full;
int destwidth, destx0, just;
if (src->width <= 0 || src->height <= 0)
return;
/*
printf("@bmp_src_to_dst. dst->bpp=%d, src->bpp=%d, src=%d x %d\n",masterinfo->bmp.bpp,src->bpp,src->width,src->height);
*/
/*
{
static int count=0;
static char filename[256];
printf(" @bmp_src_to_dst...\n");
sprintf(filename,"src%05d.png",count++);
bmp_write(src,filename,stdout,100);
}
*/
/*
if (fulljust && dst_fulljustify)
printf("srcbytespp=%d, srcbytewidth=%d, destwidth=%d, destx0=%d, destbytewidth=%d\n",
srcbytespp,srcbytewidth,destwidth,destx0,dstbytewidth);
*/
/* Determine what justification to use */
/* Left? */
if ((justification_flags & 3) == 0 /* Mandatory left just */
|| ((justification_flags & 3) == 3 /* Use user settings */
&& (dst_justify == 0
|| (dst_justify < 0
&& (justification_flags & 0xc) == 0))))
just = 0;
else if ((justification_flags & 3) == 2
|| ((justification_flags & 3) == 3
&& (dst_justify == 2
|| (dst_justify < 0
&& (justification_flags & 0xc) == 8))))
just = 2;
else
just = 1;
/* Full justification? */
destwidth = (int) (masterinfo->bmp.width
- (dst_marleft + dst_marright) * dst_dpi + .5);
go_full = (destwidth * nocr > src->width
&& (((justification_flags & 0x30) == 0x10)
|| ((justification_flags & 0x30) == 0 // Use user settings
&& (dst_fulljustify == 1
|| (dst_fulljustify < 0
&& (justification_flags & 0xc0)
== 0x40)))));
/* Put fully justified text into src1 bitmap */
if (go_full) {
src1 = &_src1;
bmp_init(src1);
bmp_fully_justify(src1, src, nocr * destwidth, whitethresh, just);
} else
src1 = src;
#if (WILLUSDEBUGX & 1)
printf("@bmp_src_to_dst: jflags=0x%02X just=%d, go_full=%d\n",justification_flags,just,go_full);
printf(" destx0=%d, destwidth=%d, src->width=%d\n",destx0,destwidth,src->width);
#endif
#ifdef HAVE_OCR
if (dst_ocr)
{
/* Run OCR on the bitmap */
words=&_words;
ocrwords_init(words);
ocrwords_fill_in(words,src1,whitethresh,dpi);
/* Scale bitmap and word positions to destination size */
if (nocr>1)
{
tmp=&_tmp;
bmp_init(tmp);
bmp_integer_resample(tmp,src1,nocr);
ocrwords_int_scale(words,nocr);
}
else
tmp=src1;
}
else
#endif
tmp = src1;
/*
printf("writing...\n");
ocrwords_box(words,tmp);
bmp_write(tmp,"out.png",stdout,100);
exit(10);
*/
destx0 = (int) (dst_marleft * dst_dpi + .5);
if (just == 0)
dw = destx0;
else if (just == 1)
dw = destx0 + (destwidth - tmp->width) / 2;
else
dw = destx0 + destwidth - tmp->width;
if (dw < 0)
dw = 0;
/* Add OCR words to destination list */
#ifdef HAVE_OCR
if (dst_ocr)
{
ocrwords_offset(words,dw,masterinfo->rows);
ocrwords_concatenate(dst_ocrwords,words);
ocrwords_free(words);
}
#endif
/* Add tmp bitmap to dst */
srcbytespp = tmp->bpp == 24 ? 3 : 1;
srcbytewidth = tmp->width * srcbytespp;
dw2 = masterinfo->bmp.width - tmp->width - dw;
dw *= srcbytespp;
dw2 *= srcbytespp;
for (i = 0; i < tmp->height; i++, masterinfo->rows++) {
unsigned char *pdst, *psrc;
psrc = bmp_rowptr_from_top(tmp, i);
pdst = bmp_rowptr_from_top(&masterinfo->bmp, masterinfo->rows);
memset(pdst, 255, dw);
pdst += dw;
memcpy(pdst, psrc, srcbytewidth);
pdst += srcbytewidth;
memset(pdst, 255, dw2);
}
#ifdef HAVE_OCR
if (dst_ocr && nocr>1)
bmp_free(tmp);
#endif
if (go_full)
bmp_free(src1);
}
/*
** Spread words out in src and put into jbmp at scaling nocr
** In case the text can't be expanded enough,
** just=0 (left justify), 1 (center), 2 (right justify)
*/
static void bmp_fully_justify(WILLUSBITMAP *jbmp, WILLUSBITMAP *src,
int jbmpwidth, int whitethresh, int just)
{
BMPREGION srcregion;
BREAKINFO *colbreaks, _colbreaks;
WILLUSBITMAP gray;
int *gappos, *gapsize;
int i, srcbytespp, srcbytewidth, jbmpbytewidth, newwidth, destx0, ng;
static char *funcname = "bmp_fully_justify";
/*
{
char filename[256];
count++;
sprintf(filename,"out%03d.png",count);
bmp_write(src,filename,stdout,100);
}
*/
/* Init/allocate destination bitmap */
jbmp->width = jbmpwidth;
jbmp->height = src->height;
jbmp->bpp = src->bpp;
if (jbmp->bpp == 8)
for (i = 0; i < 256; i++)
jbmp->red[i] = jbmp->green[i] = jbmp->blue[i] = i;
bmp_alloc(jbmp);
/* Find breaks in the text row */
colbreaks = &_colbreaks;
colbreaks->textrow = NULL;
srcregion.bgcolor = whitethresh;
srcregion.c1 = 0;
srcregion.c2 = src->width - 1;
srcregion.r1 = 0;
srcregion.r2 = src->height - 1;
srcbytespp = src->bpp == 24 ? 3 : 1;
if (srcbytespp == 3) {
srcregion.bmp = src;
srcregion.bmp8 = &gray;
bmp_init(srcregion.bmp8);
bmp_convert_to_greyscale_ex(srcregion.bmp8, src);
} else {
srcregion.bmp = src;
srcregion.bmp8 = src;
}
breakinfo_alloc(103, colbreaks, src->width);
{
int *colcount, *rowcount;
colcount = rowcount = NULL;
willus_dmem_alloc_warn(8, (void **) &colcount,
sizeof(int) * (src->width + src->height), funcname, 10);
rowcount = &colcount[src->width];
bmpregion_one_row_find_breaks(&srcregion, colbreaks, colcount, rowcount,
1);
willus_dmem_free(8, (double **) &colcount, funcname);
}
if (srcbytespp == 3)
bmp_free(srcregion.bmp8);
ng = colbreaks->n - 1;
gappos = NULL;
if (ng > 0) {
int maxsize, ms2, mingap, j;
willus_dmem_alloc_warn(9, (void **) &gappos, (2 * sizeof(int)) * ng,
funcname, 10);
gapsize = &gappos[ng];
for (i = 0; i < ng; i++) {
gappos[i] = colbreaks->textrow[i].c2 + 1;
gapsize[i] = colbreaks->textrow[i].gap;
}
/* Take only the largest group of gaps */
for (maxsize = i = 0; i < ng; i++)
if (maxsize < gapsize[i])
maxsize = gapsize[i];
mingap = srcregion.lcheight * word_spacing;
if (mingap < 2)
mingap = 2;
if (maxsize > mingap)
maxsize = mingap;
ms2 = maxsize / 2;
for (i = j = 0; i < ng; i++)
if (gapsize[i] > ms2) {
if (j != i) {
gapsize[j] = gapsize[i];
gappos[j] = gappos[i];
}
j++;
}
ng = j;
/* Figure out total pixel expansion */
newwidth = src->width * 1.25;
if (newwidth > jbmp->width)
newwidth = jbmp->width;
} else
newwidth = src->width;
breakinfo_free(103, colbreaks);
/* Starting column in destination bitmap */
if (just == 1)
destx0 = (jbmp->width - newwidth) / 2;
else if (just == 2)
destx0 = (jbmp->width - newwidth);
else
destx0 = 0;
jbmpbytewidth = bmp_bytewidth(jbmp);
srcbytewidth = bmp_bytewidth(src);
/* Clear entire fully justified bitmap */
memset(bmp_rowptr_from_top(jbmp, 0), 255, jbmpbytewidth * jbmp->height);
/* Spread out source pieces to fully justify them */
for (i = 0; i <= ng; i++) {
int j, dx0, dx, sx0;
unsigned char *pdst, *psrc;
dx = i < ng ?
(i > 0 ? gappos[i] - gappos[i - 1] : gappos[i] + 1) :
(i > 0 ? src->width - (gappos[i - 1] + 1) : src->width);
dx *= srcbytespp;
sx0 = i == 0 ? 0 : (gappos[i - 1] + 1);
dx0 = destx0 + sx0 + (i == 0 ? 0 : (newwidth - src->width) * i / ng);
psrc = bmp_rowptr_from_top(src, 0) + sx0 * srcbytespp;
pdst = bmp_rowptr_from_top(jbmp, 0) + dx0 * srcbytespp;
for (j = 0; j < src->height; j++, pdst += jbmpbytewidth, psrc +=
srcbytewidth)
memcpy(pdst, psrc, dx);
}
if (gappos != NULL)
willus_dmem_free(9, (double **) &gappos, funcname);
}
/*
** flags&1 : trim c1
** flags&2 : trim c2
** flags&4 : trim r1
** flags&8 : trim r2
** flags&16 : Find rowbase, font size, etc.
**
** Row base is where row dist crosses 50% on r2 side.
** Font size is where row dist crosses 5% on other side (r1 side).
** Lowercase font size is where row dist crosses 50% on r1 side.
**
** For 12 pt font:
** Single spacing is 14.66 pts (Calibri), 13.82 pts (Times), 13.81 pts (Arial)
** Size of cap letter is 7.7 pts (Calibri), 8.1 pts (Times), 8.7 pts (Arial)
** Size of small letter is 5.7 pts (Calibri), 5.6 pts (Times), 6.5 pts (Arial)
** Mean line spacing = 1.15 - 1.22 (~1.16)
** Mean cap height = 0.68
** Mean small letter height = 0.49
**
*/
static void bmpregion_trim_margins(BMPREGION *region, int *colcount0,
int *rowcount0, int flags)
{
int i, j, n; /* ,r1,r2,dr1,dr2,dr,vtrim,vspace; */
int *colcount, *rowcount;
static char *funcname = "bmpregion_trim_margins";
/* To detect a hyphen, we need to trim and calc text base row */
if (flags & 32)
flags |= 0x1f;
if (colcount0 == NULL)
willus_dmem_alloc_warn(10, (void **) &colcount,
sizeof(int) * (region->c2 + 1), funcname, 10);
else
colcount = colcount0;
if (rowcount0 == NULL)
willus_dmem_alloc_warn(11, (void **) &rowcount,
sizeof(int) * (region->r2 + 1), funcname, 10);
else
rowcount = rowcount0;
n = region->c2 - region->c1 + 1;
/*
printf("Trim: reg=(%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2);
if (region->c2+1 > cca || region->r2+1 > rca)
{
printf("A ha 0!\n");
exit(10);
}
*/
memset(colcount, 0, (region->c2 + 1) * sizeof(int));
memset(rowcount, 0, (region->r2 + 1) * sizeof(int));
for (j = region->r1; j <= region->r2; j++) {
unsigned char *p;
p = bmp_rowptr_from_top(region->bmp8, j) + region->c1;
for (i = 0; i < n; i++, p++)
if (p[0] < region->bgcolor) {
rowcount[j]++;
colcount[i + region->c1]++;
}
}
/*
** Trim excess margins
*/
if (flags & 1)
trim_to(colcount, &region->c1, region->c2,
src_left_to_right ? 2.0 : 4.0);
if (flags & 2)
trim_to(colcount, &region->c2, region->c1,
src_left_to_right ? 4.0 : 2.0);
if (colcount0 == NULL)
willus_dmem_free(10, (double **) &colcount, funcname);
if (flags & 4)
trim_to(rowcount, &region->r1, region->r2, 4.0);
if (flags & 8)
trim_to(rowcount, &region->r2, region->r1, 4.0);
if (flags & 16) {
int maxcount, mc2, h2;
double f;
maxcount = 0;
for (i = region->r1; i <= region->r2; i++)
if (rowcount[i] > maxcount)
maxcount = rowcount[i];
mc2 = maxcount / 2;
for (i = region->r2; i >= region->r1; i--)
if (rowcount[i] > mc2)
break;
region->rowbase = i;
for (i = region->r1; i <= region->r2; i++)
if (rowcount[i] > mc2)
break;
region->h5050 = region->lcheight = region->rowbase - i + 1;
mc2 = maxcount / 20;
for (i = region->r1; i <= region->r2; i++)
if (rowcount[i] > mc2)
break;
region->capheight = region->rowbase - i + 1;
/*
** Sanity check capheight and lcheight
*/
h2 = height2_calc(&rowcount[region->r1], region->r2 - region->r1 + 1);
#if (WILLUSDEBUGX & 8)
if (region->c2-region->c1 > 1500)
printf("reg %d x %d (%d,%d) - (%d,%d) h2=%d ch/h2=%g\n",region->c2-region->c1+1,region->r2-region->r1+1,region->c1,region->r1,region->c2,region->r2,h2,(double)region->capheight/h2);
#endif
if (region->capheight < h2 * 0.75)
region->capheight = h2;
f = (double) region->lcheight / region->capheight;
if (f < 0.55)
region->lcheight = (int) (0.72 * region->capheight + .5);
else if (f > 0.85)
region->lcheight = (int) (0.72 * region->capheight + .5);
#if (WILLUSDEBUGX & 8)
if (region->c2-region->c1 > 1500)
printf(" lcheight final = %d\n",region->lcheight);
#endif
#if (WILLUSDEBUGX & 10)
if (region->c2-region->c1 > 1500 && region->r2-region->r1 < 100)
{
static int append=0;
FILE *f;
int i;
f=fopen("textrows.ep",append==0?"w":"a");
append=1;
for (i=region->r1;i<=region->r2;i++)
fprintf(f,"%d %g\n",region->rowbase-i,(double)rowcount[i]/maxcount);
fprintf(f,"//nc\n");
fclose(f);
}
#endif
} else {
region->h5050 = region->r2 - region->r1 + 1;
region->capheight = 0.68 * (region->r2 - region->r1 + 1);
region->lcheight = 0.5 * (region->r2 - region->r1 + 1);
region->rowbase = region->r2;
}
#if (WILLUSDEBUGX & 2)
printf("trim:\n reg->c1=%d, reg->c2=%d\n",region->c1,region->c2);
printf(" reg->r1=%d, reg->r2=%d, reg->rowbase=%d\n\n",region->r1,region->r2,region->rowbase);
#endif
if (rowcount0 == NULL)
willus_dmem_free(11, (double **) &rowcount, funcname);
}
/*
** Does region end in a hyphen? If so, fill in HYPHENINFO structure.
*/
static void bmpregion_hyphen_detect(BMPREGION *region)
{
int i, j; /* ,r1,r2,dr1,dr2,dr,vtrim,vspace; */
int width;
int *r0, *r1, *r2, *r3;
int rmin, rmax, rowbytes, nrmid, rsum;
int cstart, cend, cdir;
unsigned char *p;
static char *funcname = "bmpregion_hyphen_detect";
#if (WILLUSDEBUGX & 16)
static int count=0;
char pngfile[256];
FILE *out;
count++;
printf("@bmpregion_hyphen_detect count=%d\n",count);
sprintf(pngfile,"word%04d.png",count);
bmpregion_write(region,pngfile);
sprintf(pngfile,"word%04d.txt",count);
out=fopen(pngfile,"w");
fprintf(out,"c1=%d, c2=%d, r1=%d, r2=%d\n",region->c1,region->c2,region->r1,region->r2);
fprintf(out,"lcheight=%d\n",region->lcheight);
#endif
region->hyphen.ch = -1;
region->hyphen.c2 = -1;
if (!k2_hyphen_detect)
return;
width = region->c2 - region->c1 + 1;
if (width < 2)
return;
willus_dmem_alloc_warn(27, (void **) &r0, sizeof(int) * 4 * width, funcname,
10);
r1 = &r0[width];
r2 = &r1[width];
r3 = &r2[width];
for (i = 0; i < width; i++)
r0[i] = r1[i] = r2[i] = r3[i] = -1;
rmin = region->rowbase - region->capheight - region->lcheight * .04;
if (rmin < region->r1)
rmin = region->r1;
rmax = region->rowbase + region->lcheight * .04;
if (rmax > region->r2)
rmax = region->r2;
rowbytes = bmp_bytewidth(region->bmp8);
p = bmp_rowptr_from_top(region->bmp8, 0);
nrmid = rsum = 0;
if (src_left_to_right) {
cstart = region->c2;
cend = region->c1 - 1;
cdir = -1;
} else {
cstart = region->c1;
cend = region->c2 + 1;
cdir = 1;
}
#if (WILLUSDEBUGX & 16)
fprintf(out," j r0 r1 r2 r3\n");
#endif
for (j = cstart; j != cend; j += cdir) {
int r, rmid, dr, drmax;
// printf("j=%d\n",j);
rmid = (rmin + rmax) / 2;
// printf(" rmid=%d\n",rmid);
drmax = region->r2 + 1 - rmid > rmid - region->r1 + 1 ?
region->r2 + 1 - rmid : rmid - region->r1 + 1;
/* Find dark region closest to center line */
for (dr = 0; dr < drmax; dr++) {
if (rmid + dr <= region->r2
&& p[(rmid + dr) * rowbytes + j] < region->bgcolor)
break;
if (rmid - dr >= region->r1
&& p[(rmid - dr) * rowbytes + j] < region->bgcolor) {
dr = -dr;
break;
}
}
#if (WILLUSDEBUGX & 16)
fprintf(out," dr=%d/%d, rmid+dr=%d, rmin=%d, rmax=%d, nrmid=%d\n",dr,drmax,rmid+dr,rmin,rmax,nrmid);
#endif
/* No dark detected or mark is outside hyphen region? */
/* Termination criterion #1 */
if (dr >= drmax
|| (nrmid > 2 && (double) nrmid / region->lcheight > .1
&& (rmid + dr < rmin || rmid + dr > rmax))) {
if (region->hyphen.ch >= 0 && dr >= drmax)
continue;
if (nrmid > 2 && (double) nrmid / region->lcheight > .35) {
region->hyphen.ch = j - cdir;
region->hyphen.r1 = rmin;
region->hyphen.r2 = rmax;
}
if (dr < drmax) {
region->hyphen.c2 = j;
break;
}
continue;
}
if (region->hyphen.ch >= 0) {
region->hyphen.c2 = j;
break;
}
nrmid++;
rmid += dr;
/* Dark spot is outside expected hyphen area */
/*
if (rmid<rmin || rmid>rmax)
{
if (nrmid>0)
break;
continue;
}
*/
for (r = rmid; r >= region->r1; r--)
if (p[r * rowbytes + j] >= region->bgcolor)
break;
r1[j - region->c1] = r + 1;
r0[j - region->c1] = -1;
if (r >= region->r1) {
for (; r >= region->r1; r--)
if (p[r * rowbytes + j] < region->bgcolor)
break;
if (r >= region->r1)
r0[j - region->c1] = r;
}
for (r = rmid; r <= region->r2; r++)
if (p[r * rowbytes + j] >= region->bgcolor)
break;
r2[j - region->c1] = r - 1;
r3[j - region->c1] = -1;
if (r <= region->r2) {
for (; r <= region->r2; r++)
if (p[r * rowbytes + j] < region->bgcolor)
break;
if (r <= region->r2)
r3[j - region->c1] = r;
}
#if (WILLUSDEBUGX & 16)
fprintf(out," %4d %4d %4d %4d %4d\n",j,r0[j-region->c1],r1[j-region->c1],r2[j-region->c1],r3[j-region->c1]);
#endif
if (region->hyphen.c2 < 0
&& (r0[j - region->c1] >= 0 || r3[j - region->c1] >= 0))
region->hyphen.c2 = j;
/* Termination criterion #2 */
if (nrmid > 2 && (double) nrmid / region->lcheight > .35
&& (r1[j - region->c1] > rmax || r2[j - region->c1] < rmin)) {
region->hyphen.ch = j - cdir;
region->hyphen.r1 = rmin;
region->hyphen.r2 = rmax;
if (region->hyphen.c2 < 0)
region->hyphen.c2 = j;
break;
}
// rc=(r1[j-region->c1]+r2[j-region->c1])/2;
/* DQ possible hyphen if r1/r2 out of range */
if (nrmid > 1) {
/* Too far away from last values? */
if ((double) (rmin - r1[j - region->c1]) / region->lcheight > .1
|| (double) (r2[j - region->c1] - rmax) / region->lcheight
> .1)
break;
if ((double) nrmid / region->lcheight > .1 && nrmid > 1) {
if ((double) fabs(rmin - r1[j - region->c1]) / region->lcheight
> .1
|| (double) (rmax - r2[j - region->c1])
/ region->lcheight > .1)
break;
}
}
if (nrmid == 1 || r1[j - region->c1] < rmin)
rmin = r1[j - region->c1];
if (nrmid == 1 || r2[j - region->c1] > rmax)
rmax = r2[j - region->c1];
if ((double) nrmid / region->lcheight > .1 && nrmid > 1) {
double rmean;
/* Can't be too thick */
if ((double) (rmax - rmin) / region->lcheight > .55
|| (double) (rmax - rmin) / region->lcheight < .08)
break;
/* Must be reasonably well centered above baseline */
rmean = (double) (rmax + rmin) / 2;
if ((double) (region->rowbase - rmean) / region->lcheight < 0.35
|| (double) (region->rowbase - rmean) / region->lcheight
> 0.85)
break;
if ((double) (region->rowbase - rmax) / region->lcheight < 0.2
|| (double) (region->rowbase - rmin) / region->lcheight
> 0.92)
break;
}
}
#if (WILLUSDEBUGX & 16)
fprintf(out," ch=%d, c2=%d, r1=%d, r2=%d\n",region->hyphen.ch,region->hyphen.c2,region->hyphen.r1,region->hyphen.r2);
fclose(out);
#endif
/* More sanity checks--better to miss a hyphen than falsely detect it. */
if (region->hyphen.ch >= 0) {
double ar;
/* If it's only a hyphen, then it's probably actually a dash--don't detect it. */
if (region->hyphen.c2 < 0)
region->hyphen.ch = -1;
/* Check aspect ratio */
ar = (double) (region->hyphen.r2 - region->hyphen.r1) / nrmid;
if (ar < 0.08 || ar > 0.75)
region->hyphen.ch = -1;
}
willus_dmem_free(27, (double **) &r0, funcname);
#if (WILLUSDEBUGX & 16)
if (region->hyphen.ch>=0)
printf("\n\n GOT HYPHEN.\n\n");
printf(" Exiting bmpregion_hyphen_detect\n");
#endif
}
#if (defined(WILLUSDEBUGX) || defined(WILLUSDEBUG))
static void bmpregion_write(BMPREGION *region,char *filename)
{
int i,bpp;
WILLUSBITMAP *bmp,_bmp;
bmp=&_bmp;
bmp_init(bmp);
bmp->width=region->c2-region->c1+1;
bmp->height=region->r2-region->r1+1;
bmp->bpp=region->bmp->bpp;
bpp=bmp->bpp==8?1:3;
bmp_alloc(bmp);
for (i=0;i<256;i++)
bmp->red[i]=bmp->green[i]=bmp->blue[i]=i;
for (i=0;i<bmp->height;i++)
{
unsigned char *s,*d;
s=bmp_rowptr_from_top(region->bmp,region->r1+i)+region->c1*bpp;
d=bmp_rowptr_from_top(bmp,i);
memcpy(d,s,bmp->width*bpp);
}
bmp_write(bmp,filename,stdout,97);
bmp_free(bmp);
}
#endif
#if (WILLUSDEBUGX & 6)
static void breakinfo_echo(BREAKINFO *breakinfo)
{
int i;
printf("@breakinfo_echo...\n");
for (i=0;i<breakinfo->n;i++)
printf(" %2d. r1=%4d, rowbase=%4d, r2=%4d, c1=%4d, c2=%4d\n",
i+1,breakinfo->textrow[i].r1,
breakinfo->textrow[i].rowbase,
breakinfo->textrow[i].r2,
breakinfo->textrow[i].c1,
breakinfo->textrow[i].c2);
}
#endif
/*
** Calculate weighted height of a rectangular region.
** This weighted height is intended to be close to the height of
** a capital letter, or the height of the majority of the region.
**
*/
static int height2_calc(int *rc, int n)
{
int i, thresh, i1, h2;
int *c;
static char *funcname = "height2_calc";
#if (WILLUSDEBUGX & 8)
int cmax;
#endif
if (n <= 0)
return (1);
willus_dmem_alloc_warn(12, (void **) &c, sizeof(int) * n, funcname, 10);
memcpy(c, rc, n * sizeof(int));
sorti(c, n);
#if (WILLUSDEBUGX & 8)
cmax=c[n-1];
#endif
for (i = 0; i < n - 1 && c[i] == 0; i++)
;
thresh = c[(i + n) / 3];
willus_dmem_free(12, (double **) &c, funcname);
for (i = 0; i < n - 1; i++)
if (rc[i] >= thresh)
break;
i1 = i;
for (i = n - 1; i > i1; i--)
if (rc[i] >= thresh)
break;
#if (WILLUSDEBUGX & 8)
// printf("thresh = %g, i1=%d, i2=%d\n",(double)thresh/cmax,i1,i);
#endif
h2 = i - i1 + 1; /* Guaranteed to be >=1 */
return (h2);
}
static void trim_to(int *count, int *i1, int i2, double gaplen)
{
int del, dcount, igaplen, clevel, dlevel, defect_start, last_defect;
igaplen = (int) (gaplen * src_dpi / 72.);
if (igaplen < 1)
igaplen = 1;
/* clevel=(int)(defect_size_pts*src_dpi/72./3.); */
clevel = 0;
dlevel = (int) (pow(defect_size_pts * src_dpi / 72., 2.) * PI / 4. + .5);
del = i2 > (*i1) ? 1 : -1;
defect_start = -1;
last_defect = -1;
dcount = 0;
for (; (*i1) != i2; (*i1) = (*i1) + del) {
if (count[(*i1)] <= clevel) {
dcount = 0; /* Reset defect size */
continue;
}
/* Mark found */
if (dcount == 0) {
if (defect_start >= 0)
last_defect = defect_start;
defect_start = (*i1);
}
dcount += count[(*i1)];
if (dcount >= dlevel) {
if (last_defect >= 0 && abs(defect_start - last_defect) <= igaplen)
(*i1) = last_defect;
else
(*i1) = defect_start;
return;
}
}
if (defect_start < 0)
return;
if (last_defect < 0) {
(*i1) = defect_start;
return;
}
if (abs(defect_start - last_defect) <= igaplen)
(*i1) = last_defect;
else
(*i1) = defect_start;
}
/*
** A region that needs its line spacing and justification analyzed.
**
** The region may be wider than the max desirable region width.
**
** Input: breakinfo should be valid row-break information for the region.
**
** Calls bmpregion_one_row_wrap_and_add() for each text row from the
** breakinfo structure that is within the region.
**
*/
static void bmpregion_analyze_justification_and_line_spacing(BMPREGION *region,
BREAKINFO *breakinfo, MASTERINFO *masterinfo, int *colcount,
int *rowcount, PAGEINFO *pageinfo, int allow_text_wrapping,
double force_scale)
{
int i, i1, i2, ntr, mean_row_gap, maxgap, line_spacing, nls, nch;
BMPREGION *newregion, _newregion;
double *id, *c1, *c2, *ch, *lch, *ls;
int *just, *indented, *short_line;
double capheight, lcheight, fontsize;
int textheight, ragged_right, src_line_spacing;
static char *funcname = "bmpregion_analyze_justification_and_line_spacing";
#if (WILLUSDEBUGX & 1)
printf("@bmpregion_analyze_justification_and_line_spacing");
printf(" (%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2);
printf(" centering = %d\n",breakinfo->centered);
#endif
#if (WILLUSDEBUGX & 2)
breakinfo_echo(breakinfo);
#endif
/* Locate the vertical part indices in the breakinfo structure */
newregion = &_newregion;
breakinfo_sort_by_row_position(breakinfo);
for (i = 0; i < breakinfo->n; i++) {
TEXTROW *textrow;
textrow = &breakinfo->textrow[i];
if ((textrow->r1 + textrow->r2) / 2 >= region->r1)
break;
}
if (i >= breakinfo->n)
return;
i1 = i;
for (; i < breakinfo->n; i++) {
TEXTROW *textrow;
textrow = &breakinfo->textrow[i];
if ((textrow->r1 + textrow->r2) / 2 > region->r2)
break;
}
i2 = i - 1;
if (i2 < i1)
return;
ntr = i2 - i1 + 1;
#if (WILLUSDEBUGX & 1)
printf(" i1=%d, i2=%d, ntr=%d\n",i1,i2,ntr);
#endif
willus_dmem_alloc_warn(13, (void **) &c1, sizeof(double) * 6 * ntr,
funcname, 10);
willus_dmem_alloc_warn(14, (void **) &just, sizeof(int) * 3 * ntr, funcname,
10);
c2 = &c1[ntr];
ch = &c2[ntr];
lch = &ch[ntr];
ls = &lch[ntr];
id = &ls[ntr];
indented = &just[ntr];
short_line = &indented[ntr];
for (i = 0; i < ntr; i++)
id[i] = i;
/* Find baselines / font size */
capheight = lcheight = 0.;
maxgap = -1;
for (nch = nls = 0, i = i1; i <= i2; i++) {
TEXTROW *textrow;
double ar, rh;
int marking_flags;
textrow = &breakinfo->textrow[i];
c1[i - i1] = (double) textrow->c1;
c2[i - i1] = (double) textrow->c2;
if (i < i2 && maxgap < textrow->gap) {
maxgap = textrow->gap;
if (maxgap < 2)
maxgap = 2;
}
if (textrow->c2 < textrow->c1)
ar = 100.;
else
ar = (double) (textrow->r2 - textrow->r1 + 1)
/ (double) (textrow->c2 - textrow->c1 + 1);
rh = (double) (textrow->r2 - textrow->r1 + 1) / src_dpi;
if (i < i2 && ar <= no_wrap_ar_limit
&& rh <= no_wrap_height_limit_inches)
ls[nls++] = breakinfo->textrow[i + 1].r1 - textrow->r1;
if (ar <= no_wrap_ar_limit && rh <= no_wrap_height_limit_inches) {
ch[nch] = textrow->capheight;
lch[nch] = textrow->lcheight;
nch++;
}
/* Mark region w/gray, mark rowbase also */
marking_flags = (i == i1 ? 0 : 1) | (i == i2 ? 0 : 2);
if (i < i2 || textrow->r2 - textrow->rowbase > 1)
marking_flags |= 0x10;
(*newregion) = (*region);
newregion->r1 = textrow->r1;
newregion->r2 = textrow->r2;
newregion->c1 = textrow->c1;
newregion->c2 = textrow->c2;
newregion->rowbase = textrow->rowbase;
mark_source_page(newregion, 5, marking_flags);
#if (WILLUSDEBUGX & 1)
printf(" Row %2d: (%4d,%4d) - (%4d,%4d) rowbase=%4d, lch=%d, h5050=%d, rh=%d\n",i-i1+1,textrow->c1,textrow->r1,textrow->c2,textrow->r2,textrow->rowbase,textrow->lcheight,textrow->h5050,textrow->rowheight);
#endif
}
wrapbmp_set_maxgap(maxgap);
if (nch < 1)
capheight = lcheight = 2; // Err on the side of too small
else {
capheight = median_val(ch, nch);
lcheight = median_val(lch, nch);
}
// printf("capheight = %g, lcheight = %g\n",capheight,lcheight);
bmpregion_is_centered(region, breakinfo, i1, i2, &textheight);
/*
** For 12 pt font:
** Single spacing is 14.66 pts (Calibri), 13.82 pts (Times), 13.81 pts (Arial)
** Size of cap letter is 7.7 pts (Calibri), 8.1 pts (Times), 8.7 pts (Arial)
** Size of small letter is 5.7 pts (Calibri), 5.6 pts (Times), 6.5 pts (Arial)
** Mean line spacing = 1.15 - 1.22 (~1.16)
** Mean cap height = 0.68
** Mean small letter height = 0.49
*/
fontsize = (capheight + lcheight) / 1.17;
// printf("font size = %g pts.\n",(fontsize/src_dpi)*72.);
/*
** Set line spacing for this region
*/
if (nls > 0)
src_line_spacing = median_val(ls, nls);
else
src_line_spacing = fontsize * 1.2;
if (vertical_line_spacing < 0
&& src_line_spacing
<= fabs(vertical_line_spacing) * fontsize * 1.16)
line_spacing = src_line_spacing;
else
line_spacing = fabs(vertical_line_spacing) * fontsize * 1.16;
#if (WILLUSDEBUGX & 1)
printf(" font size = %.2f pts = %d pixels\n",(fontsize/src_dpi)*72.,(int)(fontsize+.5));
printf(" src_line_spacing = %d, line_spacing = %d\n",src_line_spacing,line_spacing);
#endif
/*
if (ntr==1)
rheight= (int)((breakinfo->textrow[i1].r2 - breakinfo->textrow[i1].r1)*1.25+.5);
else
rheight = (int)((double)(breakinfo->textrow[i2].rowbase - breakinfo->textrow[i1].rowbase)/(ntr-1)+.5);
*/
mean_row_gap = line_spacing - textheight;
if (mean_row_gap <= 1)
mean_row_gap = 1;
/* Try to figure out if we have a ragged right edge */
if (ntr < 3)
ragged_right = 1;
else {
int flushcount;
if (src_left_to_right) {
for (flushcount = i = 0; i < ntr; i++) {
#if (WILLUSDEBUGX & 1)
printf(" flush_factors[%d] = %g (<.5), %g in (<.1)\n",
i,(double)(region->c2-c2[i])/textheight,(double)(region->c2-c2[i])/src_dpi);
#endif
if ((double) (region->c2 - c2[i]) / textheight < 0.5
&& (double) (region->c2 - c2[i]) / src_dpi < 0.1)
flushcount++;
}
} else {
for (flushcount = i = 0; i < ntr; i++) {
#if (WILLUSDEBUGX & 1)
printf(" flush_factors[%d] = %g (<.5), %g in (<.1)\n",
i,(double)(c1[i]-region->c1)/textheight,(double)(c1[i]-region->c1)/src_dpi);
#endif
if ((double) (c1[i] - region->c1) / textheight < 0.5
&& (double) (c1[i] - region->c1) / src_dpi < 0.1)
flushcount++;
}
}
ragged_right = (flushcount <= ntr / 2);
/*
if (src_left_to_right)
{
sortxyd(c2,id,ntr);
del = region->c2 - c2[ntr-1-ntr/3];
sortxyd(id,c2,ntr);
}
else
{
sortxyd(c1,id,ntr);
del = c1[ntr/3] - region->c1;
sortxyd(id,c1,ntr);
}
del /= textheight;
printf("del=%g\n",del);
ragged_right = (del > 0.5);
*/
}
#if (WILLUSDEBUGX & 1)
printf("ragged_right=%d\n",ragged_right);
#endif
/* Store justification and other info line by line */
for (i = i1; i <= i2; i++) {
double indent1, del;
double i1f, ilfi, i2f, ilf, ifmin, dif;
int centered;
TEXTROW *textrow;
textrow = &breakinfo->textrow[i];
i1f = (double) (c1[i - i1] - region->c1)
/ (region->c2 - region->c1 + 1);
i2f = (double) (region->c2 - c2[i - i1])
/ (region->c2 - region->c1 + 1);
ilf = src_left_to_right ? i1f : i2f;
ilfi = ilf * (region->c2 - region->c1 + 1) / src_dpi; /* Indent in inches */
ifmin = i1f < i2f ? i1f : i2f;
dif = fabs(i1f - i2f);
if (ifmin < .01)
ifmin = 0.01;
if (src_left_to_right)
indent1 = (double) (c1[i - i1] - region->c1) / textheight;
else
indent1 = (double) (region->c2 - c2[i - i1]) / textheight;
// printf(" row %2d: indent1=%g\n",i-i1,indent1);
if (!breakinfo->centered) {
indented[i - i1] = (indent1 > 0.5 && ilfi < 1.2 && ilf < .25);
centered =
(!indented[i - i1] && indent1 > 1.0 && dif / ifmin < 0.5);
} else {
centered = (dif < 0.1 || dif / ifmin < 0.5);
indented[i - i1] = (indent1 > 0.5 && ilfi < 1.2 && ilf < .25
&& !centered);
}
#if (WILLUSDEBUGX & 1)
printf("Indent %d: %d. indent1=%g, ilf=%g, centered=%d\n",i-i1+1,indented[i-i1],indent1,ilf,centered);
printf(" indent1=%g, i1f=%g, i2f=%g\n",indent1,i1f,i2f);
#endif
if (centered)
just[i - i1] = 4;
else {
/*
** The .01 favors left justification over right justification in
** close cases.
*/
if (src_left_to_right)
just[i - i1] = indented[i - i1] || (i1f < i2f + .01) ? 0 : 8;
else
just[i - i1] = indented[i - i1] || (i2f < i1f + .01) ? 8 : 0;
}
if (src_left_to_right)
del = (double) (region->c2 - textrow->c2);
else
del = (double) (textrow->c1 - region->c1);
/* Should we keep wrapping after this line? */
if (!ragged_right)
short_line[i - i1] = (del / textheight > 0.5);
else
short_line[i - i1] = (del / (region->c2 - region->c1) > 0.25);
/* If this row is a bigger/smaller row (font) than the next row, don't wrap. */
if (!short_line[i - i1] && i < i2) {
TEXTROW *t1;
t1 = &breakinfo->textrow[i + 1];
if ((textrow->h5050 > t1->h5050 * 1.5
|| textrow->h5050 * 1.5 < t1->h5050)
&& (i == 0
|| (i > 0
&& (textrow->rowheight > t1->rowheight * 1.5
|| textrow->rowheight * 1.5
< t1->rowheight))))
short_line[i - i1] = 1;
}
if (!ragged_right)
just[i - i1] |= 0x40;
#if (WILLUSDEBUGX & 1)
printf(" just[%d]=0x%02X, shortline[%d]=%d\n",i-i1,just[i-i1],i-i1,short_line[i-i1]);
printf(" textrow->c2=%d, region->c2=%d, del=%g, textheight=%d\n",textrow->c2,region->c2,del,textheight);
#endif
/* If short line, it should still be fully justified if it is wrapped. */
/*
if (short_line[i-i1])
just[i-i1] = (just[i-i1]&0xf)|0x60;
*/
}
/*
{
double mean1,mean2,stdev1,stdev2;
array_mean(c1,ntr,&mean1,&stdev1);
array_mean(c2,ntr,&mean2,&stdev2);
printf("Mean c1, c2 = %g, %g; stddevs = %g, %g\n",mean1,mean2,stdev1,stdev2);
printf("textheight = %d, line_spacing = %d\n",textheight,line_spacing);
}
*/
for (i = i1; i <= i2; i++) {
TEXTROW *textrow;
int justflags, trimflags, centered, marking_flags, gap;
#if (WILLUSDEBUGX & 1)
aprintf("Row " ANSI_YELLOW "%d of %d" ANSI_NORMAL " (wrap=%d)\n",i-i1+1,i2-i1+1,allow_text_wrapping);
#endif
textrow = &breakinfo->textrow[i];
(*newregion) = (*region);
newregion->r1 = textrow->r1;
newregion->r2 = textrow->r2;
/* The |3 tells it to use the user settings for left/right/center */
justflags = just[i - i1] | 0x3;
centered = ((justflags & 0xc) == 4);
#if (WILLUSDEBUGX & 1)
printf(" justflags[%d]=0x%2X, centered=%d, indented=%d\n",i-i1,justflags,centered,indented[i-i1]);
#endif
if (allow_text_wrapping) {
/* If this line is indented or if the justification has changed, */
/* then start a new line. */
if (centered || indented[i - i1]
|| (i > i1
&& (just[i - i1] & 0xc) != (just[i - i1 - 1] & 0xc))) {
#ifdef WILLUSDEBUG
printf("wrapflush4\n");
#endif
wrapbmp_flush(masterinfo, 0, pageinfo, 1);
}
#ifdef WILLUSDEBUG
printf(" c1=%d, c2=%d\n",newregion->c1,newregion->c2);
#endif
marking_flags = 0xc | (i == i1 ? 0 : 1) | (i == i2 ? 0 : 2);
bmpregion_one_row_wrap_and_add(newregion, breakinfo, i, i1, i2,
masterinfo, justflags, colcount, rowcount, pageinfo,
line_spacing, mean_row_gap, textrow->rowbase, marking_flags,
indented[i - i1]);
if (centered || short_line[i - i1]) {
#ifdef WILLUSDEBUG
printf("wrapflush5\n");
#endif
wrapbmp_flush(masterinfo, 0, pageinfo, 2);
}
continue;
}
#ifdef WILLUSDEBUG
printf("wrapflush5a\n");
#endif
wrapbmp_flush(masterinfo, 0, pageinfo, 1);
/* If default justifications, ignore all analysis and just center it. */
if (dst_justify < 0 && dst_fulljustify < 0) {
newregion->c1 = region->c1;
newregion->c2 = region->c2;
justflags = 0xad; /* Force centered region, no justification */
trimflags = 0x80;
} else
trimflags = 0;
/* No wrapping: text wrap, trim flags, vert breaks, fscale, just */
bmpregion_add(newregion, breakinfo, masterinfo, 0, trimflags, 0,
force_scale, justflags, 5, colcount, rowcount, pageinfo, 0,
textrow->r2 - textrow->rowbase);
if (vertical_line_spacing < 0) {
int gap1;
gap1 = line_spacing - (textrow->r2 - textrow->r1 + 1);
if (i < i2)
gap = textrow->gap > gap1 ? gap1 : textrow->gap;
else {
gap = textrow->rowheight
- (textrow->rowbase + last_rowbase_internal);
if (gap < mean_row_gap / 2.)
gap = mean_row_gap;
}
} else {
gap = line_spacing - (textrow->r2 - textrow->r1 + 1);
if (gap < mean_row_gap / 2.)
gap = mean_row_gap;
}
if (i < i2)
dst_add_gap_src_pixels("No-wrap line", masterinfo, gap);
else {
last_h5050_internal = textrow->h5050;
beginning_gap_internal = gap;
}
}
willus_dmem_free(14, (double **) &just, funcname);
willus_dmem_free(13, (double **) &c1, funcname);
#ifdef WILLUSDEBUG
printf("Done wrap_and_add.\n");
#endif
}
static int bmpregion_is_centered(BMPREGION *region, BREAKINFO *breakinfo,
int i1, int i2, int *th)
{
int j, i, cc, n1, ntr;
int textheight;
#if (WILLUSDEBUGX & 1)
printf("@bmpregion_is_centered: region=(%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2);
printf(" nrows = %d\n",i2-i1+1);
#endif
ntr = i2 - i1 + 1;
for (j = 0; j < 3; j++) {
for (n1 = textheight = 0, i = i1; i <= i2; i++) {
TEXTROW *textrow;
double ar, rh;
textrow = &breakinfo->textrow[i];
if (textrow->c2 < textrow->c1)
ar = 100.;
else
ar = (double) (textrow->r2 - textrow->r1 + 1)
/ (double) (textrow->c2 - textrow->c1 + 1);
rh = (double) (textrow->r2 - textrow->r1 + 1) / src_dpi;
if (j == 2 || (j >= 1 && rh <= no_wrap_height_limit_inches)
|| (j == 0 && rh <= no_wrap_height_limit_inches
&& ar <= no_wrap_ar_limit)) {
textheight += textrow->rowbase - textrow->r1 + 1;
n1++;
}
}
if (n1 > 0)
break;
}
textheight = (int) ((double) textheight / n1 + .5);
if (th != NULL) {
(*th) = textheight;
#if (WILLUSDEBUGX & 1)
printf(" textheight assigned (%d)\n",textheight);
#endif
return (breakinfo->centered);
}
/*
** Does region appear to be centered?
*/
for (cc = 0, i = i1; i <= i2; i++) {
double indent1, indent2;
#if (WILLUSDEBUGX & 1)
printf(" tr[%d].c1,c2 = %d, %d\n",i,breakinfo->textrow[i].c1,breakinfo->textrow[i].c2);
#endif
indent1 = (double) (breakinfo->textrow[i].c1 - region->c1) / textheight;
indent2 = (double) (region->c2 - breakinfo->textrow[i].c2) / textheight;
#if (WILLUSDEBUGX & 1)
printf(" tr[%d].indent1,2 = %g, %g\n",i,indent1,indent2);
#endif
/* If only one line and it spans the entire region, call it centered */
/* Sometimes this won't be the right thing to to. */
if (i1 == i2 && indent1 < .5 && indent2 < .5) {
#if (WILLUSDEBUGX & 1)
printf(" One line default to bigger region (%s).\n",breakinfo->centered?"not centered":"centered");
#endif
return (1);
}
if (fabs(indent1 - indent2) > 1.5) {
#if (WILLUSDEBUGX & 1)
printf(" Region not centered.\n");
#endif
return (0);
}
if (indent1 > 1.0)
cc++;
}
#if (WILLUSDEBUGX & 1)
printf("Region centering: i=%d, i2=%d, cc=%d, ntr=%d\n",i,i2,cc,ntr);
#endif
if (cc > ntr / 2) {
#if (WILLUSDEBUGX & 1)
printf(" Region is centered (enough obviously centered lines).\n");
#endif
return (1);
}
#if (WILLUSDEBUGX & 1)
printf(" Not centered (not enough obviously centered lines).\n");
#endif
return (0);
}
/* array.c */
/*
**
** Compute mean and standard deviation
**
*/
double array_mean(double *a, int n, double *mean, double *stddev)
{
int i;
double sum, avg, sum_sq;
if (n < 1)
return (0.);
for (sum = sum_sq = i = 0; i < n; i++)
sum += a[i];
avg = sum / n;
if (mean != NULL)
(*mean) = avg;
if (stddev != NULL) {
double sum_sq;
for (sum_sq = i = 0; i < n; i++)
sum_sq += (a[i] - avg) * (a[i] - avg);
(*stddev) = sqrt(sum_sq / n);
}
return (avg);
}
/*
** CAUTION: This function re-orders the x[] array!
*/
static double median_val(double *x, int n)
{
int i1, n1;
if (n < 4)
return (array_mean(x, n, NULL, NULL));
sortd(x, n);
if (n == 4) {
n1 = 2;
i1 = 1;
} else if (n == 5) {
n1 = 3;
i1 = 1;
} else {
n1 = n / 3;
i1 = (n - n1) / 2;
}
return (array_mean(&x[i1], n1, NULL, NULL));
}
/*
**
** Searches the region for vertical break points and stores them into
** the BREAKINFO structure.
**
** apsize_in = averaging aperture size in inches. Use -1 for dynamic aperture.
**
*/
static void bmpregion_find_vertical_breaks(BMPREGION *region,
BREAKINFO *breakinfo, int *colcount, int *rowcount, double apsize_in)
{
static char *funcname = "bmpregion_find_vertical_breaks";
int nr, i, brc, brcmin, dtrc, trc, aperture, aperturemax, figrow, labelrow;
int ntr, rhmin_pix;
BMPREGION *newregion, _newregion;
int *rowthresh;
double min_fig_height, max_fig_gap, max_label_height;
min_fig_height = dst_min_figure_height_in;
max_fig_gap = 0.16;
max_label_height = 0.5;
/* Trim region and populate colcount/rowcount arrays */
bmpregion_trim_margins(region, colcount, rowcount, 0xf);
newregion = &_newregion;
(*newregion) = (*region);
if (debug)
printf("@bmpregion_find_vertical_breaks: (%d,%d) - (%d,%d)\n",
region->c1, region->r1, region->c2, region->r2);
/*
** brc = consecutive blank pixel rows
** trc = consecutive non-blank pixel rows
** dtrc = number of non blank pixel rows since last dump
*/
nr = region->r2 - region->r1 + 1;
willus_dmem_alloc_warn(15, (void **) &rowthresh, sizeof(int) * nr, funcname,
10);
brcmin = max_vertical_gap_inches * src_dpi;
aperturemax = (int) (src_dpi / 72. + .5);
if (aperturemax < 2)
aperturemax = 2;
aperture = (int) (src_dpi * apsize_in + .5);
/*
for (i=region->r1;i<=region->r2;i++)
printf("rowcount[%d]=%d\n",i,rowcount[i]);
*/
breakinfo->rhmean_pixels = 0; // Mean text row height
ntr = 0; // Number of text rows
/* Fill rowthresh[] array */
for (dtrc = 0, i = region->r1; i <= region->r2; i++) {
int ii, i1, i2, sum, pt;
if (apsize_in < 0.) {
aperture = (int) (dtrc / 13.7 + .5);
if (aperture > aperturemax)
aperture = aperturemax;
if (aperture < 2)
aperture = 2;
}
i1 = i - aperture / 2;
i2 = i1 + aperture - 1;
if (i1 < region->r1)
i1 = region->r1;
if (i2 > region->r2)
i2 = region->r2;
pt = (int) ((i2 - i1 + 1) * gtr_in * src_dpi + .5); /* pixel count threshold */
if (pt < 1)
pt = 1;
/* Sum over row aperture */
for (sum = 0, ii = i1; ii <= i2; sum += rowcount[ii], ii++)
;
/* Does row have few enough black pixels to be considered blank? */
if ((rowthresh[i - region->r1] = 10 * sum / pt) <= 40) {
if (dtrc > 0) {
breakinfo->rhmean_pixels += dtrc;
ntr++;
}
dtrc = 0;
} else
dtrc++;
}
if (dtrc > 0) {
breakinfo->rhmean_pixels += dtrc;
ntr++;
}
if (ntr > 0)
breakinfo->rhmean_pixels /= ntr;
/*
printf("rhmean=%d (ntr=%d)\n",breakinfo->rhmean_pixels,ntr);
{
FILE *f;
static int count=0;
f=fopen("rthresh.ep",count==0?"w":"a");
count++;
for (i=region->r1;i<=region->r2;i++)
nprintf(f,"%d\n",rowthresh[i-region->r1]);
nprintf(f,"//nc\n");
fclose(f);
}
*/
/* Minimum text row height required (pixels) */
rhmin_pix = breakinfo->rhmean_pixels / 3;
if (rhmin_pix < .04 * src_dpi)
rhmin_pix = .04 * src_dpi;
if (rhmin_pix > .13 * src_dpi)
rhmin_pix = .13 * src_dpi;
if (rhmin_pix < 1)
rhmin_pix = 1;
/*
for (rmax=region->r2;rmax>region->r1;rmax--)
if (rowthresh[rmax-region->r1]>10)
break;
*/
/* Look for "row" gaps in the region so that it can be broken into */
/* multiple "rows". */
breakinfo->n = 0;
for (labelrow = figrow = -1, dtrc = trc = brc = 0, i = region->r1;
i <= region->r2; i++) {
/* Does row have few enough black pixels to be considered blank? */
if (rowthresh[i - region->r1] <= 10) {
trc = 0;
brc++;
/*
** Max allowed white space between rows = max_vertical_gap_inches
*/
if (dtrc == 0) {
if (brc > brcmin)
newregion->r1++;
continue;
}
/*
** Big enough blank gap, so add one row / line
*/
if (dtrc + brc >= rhmin_pix) {
int i0, iopt;
double region_height_inches;
double gap_inches;
if (dtrc < src_dpi * 0.02)
dtrc = src_dpi * 0.02;
if (dtrc < 2)
dtrc = 2;
/* Look for more optimum point */
for (i0 = iopt = i; i <= region->r2 && i - i0 < dtrc; i++) {
if (rowthresh[i - region->r1]
< rowthresh[iopt - region->r1]) {
iopt = i;
if (rowthresh[i - region->r1] == 0)
break;
}
if (rowthresh[i - region->r1] > 100)
break;
}
/* If at end of region and haven't found perfect break, stay at end */
if (i > region->r2 && rowthresh[iopt - region->r1] > 0)
i = region->r2;
else
i = iopt;
newregion->r2 = i - 1;
region_height_inches = (double) (newregion->r2 - newregion->r1
+ 1) / src_dpi;
/* Could this region be a figure? */
if (figrow < 0 && region_height_inches >= min_fig_height) {
/* If so, set figrow and don't process it yet. */
figrow = newregion->r1;
labelrow = -1;
newregion->r1 = i;
dtrc = trc = 0;
brc = 1;
continue;
}
/* Are we processing a figure? */
if (figrow >= 0) {
/* Compute most recent gap */
if (labelrow >= 0)
gap_inches = (double) (labelrow - newregion->r1)
/ src_dpi;
else
gap_inches = -1.;
/* If gap and region height are small enough, tack them on to the figure. */
if (region_height_inches < max_label_height
&& gap_inches > 0. && gap_inches < max_fig_gap)
newregion->r1 = figrow;
else {
/* Not small enough--dump the previous figure. */
newregion->r2 = newregion->r1 - 1;
newregion->r1 = figrow;
newregion->c1 = region->c1;
newregion->c2 = region->c2;
bmpregion_trim_margins(newregion, colcount, rowcount,
0x1f);
if (newregion->r2 > newregion->r1)
textrow_assign_bmpregion(
&breakinfo->textrow[breakinfo->n++],
newregion);
if (gap_inches > 0. && gap_inches < max_fig_gap) {
/* This new region might be a figure--set it as the new figure */
/* and don't dump it yet. */
figrow = newregion->r2 + 1;
labelrow = -1;
newregion->r1 = i;
dtrc = trc = 0;
brc = 1;
continue;
} else {
newregion->r1 = newregion->r2 + 1;
newregion->r2 = i - 1;
}
}
/* Cancel figure processing */
figrow = -1;
labelrow = -1;
}
/*
if (newregion->r2 >= rmax)
i=newregion->r2=region->r2;
*/
newregion->c1 = region->c1;
newregion->c2 = region->c2;
bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f);
if (newregion->r2 > newregion->r1)
textrow_assign_bmpregion(
&breakinfo->textrow[breakinfo->n++], newregion);
newregion->r1 = i;
dtrc = trc = 0;
brc = 1;
}
} else {
if (figrow >= 0 && labelrow < 0)
labelrow = i;
dtrc++;
trc++;
brc = 0;
}
}
newregion->r2 = region->r2;
if (dtrc > 0 && newregion->r2 - newregion->r1 + 1 > 0) {
/* If we were processing a figure, include it. */
if (figrow >= 0)
newregion->r1 = figrow;
newregion->c1 = region->c1;
newregion->c2 = region->c2;
bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f);
if (newregion->r2 > newregion->r1)
textrow_assign_bmpregion(&breakinfo->textrow[breakinfo->n++],
newregion);
}
/* Compute gaps between rows and row heights */
breakinfo_compute_row_gaps(breakinfo, region->r2);
willus_dmem_free(15, (double **) &rowthresh, funcname);
}
static void textrow_assign_bmpregion(TEXTROW *textrow, BMPREGION *region)
{
textrow->r1 = region->r1;
textrow->r2 = region->r2;
textrow->c1 = region->c1;
textrow->c2 = region->c2;
textrow->rowbase = region->rowbase;
textrow->lcheight = region->lcheight;
textrow->capheight = region->capheight;
textrow->h5050 = region->h5050;
}
static void breakinfo_compute_row_gaps(BREAKINFO *breakinfo, int r2)
{
int i, n;
n = breakinfo->n;
if (n <= 0)
return;
breakinfo->textrow[0].rowheight = breakinfo->textrow[0].r2
- breakinfo->textrow[0].r1;
for (i = 0; i < n - 1; i++)
breakinfo->textrow[i].gap = breakinfo->textrow[i + 1].r1
- breakinfo->textrow[i].rowbase - 1;
/*
breakinfo->textrow[i].rowheight = breakinfo->textrow[i+1].r1 - breakinfo->textrow[i].r1;
*/
for (i = 1; i < n; i++)
breakinfo->textrow[i].rowheight = breakinfo->textrow[i].rowbase
- breakinfo->textrow[i - 1].rowbase;
breakinfo->textrow[n - 1].gap = r2 - breakinfo->textrow[n - 1].rowbase;
}
static void breakinfo_compute_col_gaps(BREAKINFO *breakinfo, int c2)
{
int i, n;
n = breakinfo->n;
if (n <= 0)
return;
for (i = 0; i < n - 1; i++) {
breakinfo->textrow[i].gap = breakinfo->textrow[i + 1].c1
- breakinfo->textrow[i].c2 - 1;
breakinfo->textrow[i].rowheight = breakinfo->textrow[i + 1].c1
- breakinfo->textrow[i].c1;
}
breakinfo->textrow[n - 1].gap = c2 - breakinfo->textrow[n - 1].c2;
breakinfo->textrow[n - 1].rowheight = breakinfo->textrow[n - 1].c2
- breakinfo->textrow[n - 1].c1;
}
static void breakinfo_remove_small_col_gaps(BREAKINFO *breakinfo, int lcheight,
double mingap)
{
int i, j;
if (mingap < word_spacing)
mingap = word_spacing;
for (i = 0; i < breakinfo->n - 1; i++) {
double gap;
gap = (double) breakinfo->textrow[i].gap / lcheight;
if (gap >= mingap)
continue;
breakinfo->textrow[i].c2 = breakinfo->textrow[i + 1].c2;
breakinfo->textrow[i].gap = breakinfo->textrow[i + 1].gap;
if (breakinfo->textrow[i + 1].r1 < breakinfo->textrow[i].r1)
breakinfo->textrow[i].r1 = breakinfo->textrow[i + 1].r1;
if (breakinfo->textrow[i + 1].r2 > breakinfo->textrow[i].r2)
breakinfo->textrow[i].r2 = breakinfo->textrow[i + 1].r2;
for (j = i + 1; j < breakinfo->n - 1; j++)
breakinfo->textrow[j] = breakinfo->textrow[j + 1];
breakinfo->n--;
i--;
}
}
static void breakinfo_remove_small_rows(BREAKINFO *breakinfo, double fracrh,
double fracgap, BMPREGION *region, int *colcount, int *rowcount)
{
int i, j, mg, mh, mg0, mg1;
int c1, c2, nc;
int *rh, *gap;
static char *funcname = "breakinfo_remove_small_rows";
#if (WILLUSDEBUGX & 2)
printf("@breakinfo_remove_small_rows(fracrh=%g,fracgap=%g)\n",fracrh,fracgap);
#endif
if (breakinfo->n < 2)
return;
c1 = region->c1;
c2 = region->c2;
nc = c2 - c1 + 1;
willus_dmem_alloc_warn(16, (void **) &rh, 2 * sizeof(int) * breakinfo->n,
funcname, 10);
gap = &rh[breakinfo->n];
for (i = 0; i < breakinfo->n; i++) {
rh[i] = breakinfo->textrow[i].r2 - breakinfo->textrow[i].r1 + 1;
if (i < breakinfo->n - 1)
gap[i] = breakinfo->textrow[i].gap;
}
sorti(rh, breakinfo->n);
sorti(gap, breakinfo->n - 1);
mh = rh[breakinfo->n / 2];
mh *= fracrh;
if (mh < 1)
mh = 1;
mg0 = gap[(breakinfo->n - 1) / 2];
mg = mg0 * fracgap;
mg1 = mg0 * 0.7;
if (mg < 1)
mg = 1;
#if (WILLUSDEBUGX & 2)
printf("mh = %d x %g = %d\n",rh[breakinfo->n/2],fracrh,mh);
printf("mg = %d x %g = %d\n",gap[breakinfo->n/2],fracgap,mg);
#endif
for (i = 0; i < breakinfo->n; i++) {
TEXTROW *textrow;
int trh, gs1, gs2, g1, g2, gap_is_big, row_too_small;
double m1, m2, row_width_inches;
textrow = &breakinfo->textrow[i];
trh = textrow->r2 - textrow->r1 + 1;
if (i == 0) {
g1 = mg0 + 1;
gs1 = mg + 1;
} else {
g1 = textrow->r1 - breakinfo->textrow[i - 1].r2 - 1;
gs1 = breakinfo->textrow[i - 1].gap;
}
if (i == breakinfo->n - 1) {
g2 = mg0 + 1;
gs2 = mg + 1;
} else {
g2 = breakinfo->textrow[i + 1].r1 - textrow->r2 - 1;
gs2 = breakinfo->textrow[i].gap;
}
#if (WILLUSDEBUGX & 2)
printf(" rowheight[%d] = %d, mh=%d, gs1=%d, gs2=%d\n",i,trh,gs1,gs2);
#endif
gap_is_big = (trh >= mh || (gs1 >= mg && gs2 >= mg));
/*
** Is the row width small and centered? If so, it should probably
** be attached to its nearest neighbor--it's usually a fragment of
** an equation or a table/figure.
*/
row_width_inches = (double) (textrow->c2 - textrow->c1 + 1) / src_dpi;
m1 = fabs(textrow->c1 - c1) / nc;
m2 = fabs(textrow->c2 - c2) / nc;
row_too_small = m1 > 0.1 && m2 > 0.1
&& row_width_inches < little_piece_threshold_inches
&& (g1 <= mg1 || g2 <= mg1);
#if (WILLUSDEBUGX & 2)
printf(" m1=%g, m2=%g, rwi=%g, g1=%d, g2=%d, mg0=%d\n",m1,m2,row_width_inches,g1,g2,mg0);
#endif
if (gap_is_big && !row_too_small)
continue;
#if (WILLUSDEBUGX & 2)
printf(" row[%d] to be combined w/next row.\n",i);
#endif
if (row_too_small) {
if (g1 < g2)
i--;
} else {
if (gs1 < gs2)
i--;
}
/*
printf("Removing row. nrows=%d, rh=%d, gs1=%d, gs2=%d\n",breakinfo->n,trh,gs1,gs2);
printf(" mh = %d, mg = %d\n",rh[breakinfo->n/2],gap[(breakinfo->n-1)/2]);
*/
breakinfo->textrow[i].r2 = breakinfo->textrow[i + 1].r2;
if (breakinfo->textrow[i + 1].c2 > breakinfo->textrow[i].c2)
breakinfo->textrow[i].c2 = breakinfo->textrow[i + 1].c2;
if (breakinfo->textrow[i + 1].c1 < breakinfo->textrow[i].c1)
breakinfo->textrow[i].c1 = breakinfo->textrow[i + 1].c1;
/* Re-compute rowbase, capheight, lcheight */
{
BMPREGION newregion;
newregion = (*region);
newregion.c1 = breakinfo->textrow[i].c1;
newregion.c2 = breakinfo->textrow[i].c2;
newregion.r1 = breakinfo->textrow[i].r1;
newregion.r2 = breakinfo->textrow[i].r2;
bmpregion_trim_margins(&newregion, colcount, rowcount, 0x1f);
newregion.c1 = breakinfo->textrow[i].c1;
newregion.c2 = breakinfo->textrow[i].c2;
newregion.r1 = breakinfo->textrow[i].r1;
newregion.r2 = breakinfo->textrow[i].r2;
textrow_assign_bmpregion(&breakinfo->textrow[i], &newregion);
}
for (j = i + 1; j < breakinfo->n - 1; j++)
breakinfo->textrow[j] = breakinfo->textrow[j + 1];
breakinfo->n--;
i--;
}
willus_dmem_free(16, (double **) &rh, funcname);
}
static void breakinfo_alloc(int index, BREAKINFO *breakinfo, int nrows)
{
static char *funcname = "breakinfo_alloc";
willus_dmem_alloc_warn(index, (void **) &breakinfo->textrow,
sizeof(TEXTROW) * (nrows / 2 + 2), funcname, 10);
}
static void breakinfo_free(int index, BREAKINFO *breakinfo)
{
static char *funcname = "breakinfo_free";
willus_dmem_free(index, (double **) &breakinfo->textrow, funcname);
}
static void breakinfo_sort_by_gap(BREAKINFO *breakinfo)
{
int n, top, n1;
TEXTROW *x, x0;
x = breakinfo->textrow;
n = breakinfo->n;
if (n < 2)
return;
top = n / 2;
n1 = n - 1;
while (1) {
if (top > 0) {
top--;
x0 = x[top];
} else {
x0 = x[n1];
x[n1] = x[0];
n1--;
if (!n1) {
x[0] = x0;
return;
}
}
{
int parent, child;
parent = top;
child = top * 2 + 1;
while (child <= n1) {
if (child < n1 && x[child].gap < x[child + 1].gap)
child++;
if (x0.gap < x[child].gap) {
x[parent] = x[child];
parent = child;
child += (parent + 1);
} else
break;
}
x[parent] = x0;
}
}
}
static void breakinfo_sort_by_row_position(BREAKINFO *breakinfo)
{
int n, top, n1;
TEXTROW *x, x0;
x = breakinfo->textrow;
n = breakinfo->n;
if (n < 2)
return;
top = n / 2;
n1 = n - 1;
while (1) {
if (top > 0) {
top--;
x0 = x[top];
} else {
x0 = x[n1];
x[n1] = x[0];
n1--;
if (!n1) {
x[0] = x0;
return;
}
}
{
int parent, child;
parent = top;
child = top * 2 + 1;
while (child <= n1) {
if (child < n1 && x[child].r1 < x[child + 1].r1)
child++;
if (x0.r1 < x[child].r1) {
x[parent] = x[child];
parent = child;
child += (parent + 1);
} else
break;
}
x[parent] = x0;
}
}
}
/*
** Add a vertically-contiguous rectangular region to the destination bitmap.
** The rectangular region may be broken up horizontally (wrapped).
*/
static void bmpregion_one_row_find_breaks(BMPREGION *region,
BREAKINFO *breakinfo, int *colcount, int *rowcount, int add_to_dbase)
{
int nc, i, mingap, col0, dr, thlow, thhigh;
int *bp;
BMPREGION *newregion, _newregion;
static char *funcname = "bmpregion_one_row_find_breaks";
if (debug)
printf("@bmpregion_one_row_find_breaks(%d,%d)-(%d,%d)\n", region->c1,
region->r1, region->c2, region->r2);
newregion = &_newregion;
(*newregion) = (*region);
bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f);
region->lcheight = newregion->lcheight;
region->capheight = newregion->capheight;
region->rowbase = newregion->rowbase;
region->h5050 = newregion->h5050;
nc = newregion->c2 - newregion->c1 + 1;
breakinfo->n = 0;
if (nc < 6)
return;
/*
** Look for "space-sized" gaps, i.e. gaps that would occur between words.
** Use this as pixel counting aperture.
*/
dr = newregion->lcheight;
mingap = dr * word_spacing * 0.8;
if (mingap < 2)
mingap = 2;
/*
** Find places where there are gaps (store in bp array)
** Could do this more intelligently--maybe calculate a histogram?
*/
willus_dmem_alloc_warn(18, (void **) &bp, sizeof(int) * nc, funcname, 10);
for (i = 0; i < nc; i++)
bp[i] = 0;
if (src_left_to_right) {
for (i = newregion->c1; i <= newregion->c2; i++) {
int i1, i2, pt, sum, ii;
i1 = i - mingap / 2;
i2 = i1 + mingap - 1;
if (i1 < newregion->c1)
i1 = newregion->c1;
if (i2 > newregion->c2)
i2 = newregion->c2;
pt = (int) ((i2 - i1 + 1) * gtw_in * src_dpi + .5);
if (pt < 1)
pt = 1;
for (sum = 0, ii = i1; ii <= i2; ii++, sum += colcount[ii])
;
bp[i - newregion->c1] = 10 * sum / pt;
}
} else {
for (i = newregion->c2; i >= newregion->c1; i--) {
int i1, i2, pt, sum, ii;
i1 = i - mingap / 2;
i2 = i1 + mingap - 1;
if (i1 < newregion->c1)
i1 = newregion->c1;
if (i2 > newregion->c2)
i2 = newregion->c2;
pt = (int) ((i2 - i1 + 1) * gtw_in * src_dpi + .5);
if (pt < 1)
pt = 1;
for (sum = 0, ii = i1; ii <= i2; ii++, sum += colcount[ii])
;
bp[i - newregion->c1] = 10 * sum / pt;
}
}
#if (WILLUSDEBUGX & 4)
if (region->r1 > 3699 && region->r1<3750)
{
static int a=0;
FILE *f;
f=fopen("outbp.ep",a==0?"w":"a");
a++;
fprintf(f,"/sa l \"(%d,%d)-(%d,%d) lch=%d\" 2\n",region->c1,region->r1,region->c2,region->r2,region->lcheight);
for (i=0;i<nc;i++)
fprintf(f,"%d\n",bp[i]);
fprintf(f,"//nc\n");
fclose(f);
}
#endif
thlow = 10;
thhigh = 50;
/*
** Break into pieces
*/
for (col0 = newregion->c1; col0 <= newregion->c2; col0++) {
int copt, c0;
BMPREGION xregion;
xregion = (*newregion);
xregion.c1 = col0;
for (; col0 <= newregion->c2; col0++)
if (bp[col0 - newregion->c1] >= thhigh)
break;
if (col0 > newregion->c2)
break;
for (col0++; col0 <= newregion->c2; col0++)
if (bp[col0 - newregion->c1] < thlow)
break;
for (copt = c0 = col0; col0 <= newregion->c2 && col0 - c0 <= dr;
col0++) {
if (bp[col0 - newregion->c1] < bp[copt - newregion->c1])
copt = col0;
if (bp[col0 - newregion->c1] > thhigh)
break;
}
if (copt > newregion->c2)
copt = newregion->c2;
xregion.c2 = copt;
if (xregion.c2 - xregion.c1 < 2)
continue;
bmpregion_trim_margins(&xregion, colcount, rowcount, 0x1f);
textrow_assign_bmpregion(&breakinfo->textrow[breakinfo->n++], &xregion);
col0 = copt;
if (copt == newregion->c2)
break;
}
breakinfo_compute_col_gaps(breakinfo, newregion->c2);
willus_dmem_free(18, (double **) &bp, funcname);
/* Remove small gaps */
{
double median_gap;
word_gaps_add(add_to_dbase ? breakinfo : NULL, region->lcheight,
&median_gap);
breakinfo_remove_small_col_gaps(breakinfo, region->lcheight,
median_gap / 1.9);
}
}
/*
** pi = preserve indentation
*/
static void bmpregion_one_row_wrap_and_add(BMPREGION *region,
BREAKINFO *rowbreakinfo, int index, int i1, int i2,
MASTERINFO *masterinfo, int justflags, int *colcount, int *rowcount,
PAGEINFO *pageinfo, int line_spacing, int mean_row_gap, int rowbase,
int marking_flags, int pi)
{
int nc, nr, i, i0, gappix;
double aspect_ratio, region_height;
BREAKINFO *colbreaks, _colbreaks;
BMPREGION *newregion, _newregion;
#if (WILLUSDEBUGX & 4)
printf("@bmpregion_one_row_wrap_and_add, index=%d, i1=%d, i2=%d\n",index,i1,i2);
#endif
newregion = &_newregion;
(*newregion) = (*region);
bmpregion_trim_margins(newregion, colcount, rowcount, 0xf);
nc = newregion->c2 - newregion->c1 + 1;
nr = newregion->r2 - newregion->r1 + 1;
if (nc < 6)
return;
aspect_ratio = (double) nr / nc;
region_height = (double) nr / src_dpi;
if (aspect_ratio > no_wrap_ar_limit
&& region_height > no_wrap_height_limit_inches) {
newregion->r1 = region->r1;
newregion->r2 = region->r2;
#ifdef WILLUSDEBUG
printf("wrapflush6\n");
#endif
wrapbmp_flush(masterinfo, 0, pageinfo, 1);
if (index > i1)
dst_add_gap_src_pixels("Tall region", masterinfo,
rowbreakinfo->textrow[index - 1].gap);
bmpregion_add(newregion, rowbreakinfo, masterinfo, 0, 0xf, 0, -1.0, 0,
2, colcount, rowcount, pageinfo, 0xf,
rowbreakinfo->textrow[index].r2
- rowbreakinfo->textrow[index].rowbase);
if (index < i2)
gap_override_internal = rowbreakinfo->textrow[index].gap;
return;
}
colbreaks = &_colbreaks;
colbreaks->textrow = NULL;
breakinfo_alloc(106, colbreaks, newregion->c2 - newregion->c1 + 1);
bmpregion_one_row_find_breaks(newregion, colbreaks, colcount, rowcount, 1);
if (pi && colbreaks->n > 0) {
if (src_left_to_right)
colbreaks->textrow[0].c1 = region->c1;
else
colbreaks->textrow[colbreaks->n - 1].c2 = region->c2;
}
/*
hs=0.;
for (i=0;i<colbreaks->n;i++)
hs += (colbreaks->textrow[i].r2-colbreaks->textrow[i].r1);
hs /= colbreaks->n;
*/
/*
** Find appropriate letter height to use for word spacing
*/
{
double median_gap;
word_gaps_add(NULL, newregion->lcheight, &median_gap);
gappix = (int) (median_gap * newregion->lcheight + .5);
}
#if (WILLUSDEBUGX & 4)
printf("Before small gap removal, column breaks:\n");
breakinfo_echo(colbreaks);
#endif
#if (WILLUSDEBUGX & 4)
printf("After small gap removal, column breaks:\n");
breakinfo_echo(colbreaks);
#endif
if (show_marked_source)
for (i = 0; i < colbreaks->n; i++) {
BMPREGION xregion;
xregion = (*newregion);
xregion.c1 = colbreaks->textrow[i].c1;
xregion.c2 = colbreaks->textrow[i].c2;
mark_source_page(&xregion, 2, marking_flags);
}
#if (WILLUSDEBUGX & 4)
for (i=0;i<colbreaks->n;i++)
printf(" colbreak[%d] = %d - %d\n",i,colbreaks->textrow[i].c1,colbreaks->textrow[i].c2);
#endif
/* Maybe skip gaps < 0.5*median_gap or collect gap/rowheight ratios and skip small gaps */
/* (Could be thrown off by full-justified articles where some lines have big gaps.) */
/* Need do call a separate function that removes these gaps. */
for (i0 = 0; i0 < colbreaks->n;) {
int i1, i2, toolong, rw, remaining_width_pixels;
BMPREGION reg;
toolong = 0; /* Avoid compiler warning */
for (i = i0; i < colbreaks->n; i++) {
int wordgap;
wordgap = wrapbmp_ends_in_hyphen() ? 0 : gappix;
i1 = src_left_to_right ? i0 : colbreaks->n - 1 - i;
i2 = src_left_to_right ? i : colbreaks->n - 1 - i0;
rw = (colbreaks->textrow[i2].c2 - colbreaks->textrow[i1].c1 + 1);
remaining_width_pixels = wrapbmp_remaining();
toolong = (rw + wordgap > remaining_width_pixels);
#if (WILLUSDEBUGX & 4)
printf(" i1=%d, i2=%d, rw=%d, rw+gap=%d, remainder=%d, toolong=%d\n",i1,i2,rw,rw+wordgap,remaining_width_pixels,toolong);
#endif
/*
** If we're too long with just one word and there is already
** stuff on the queue, then flush it and re-evaluate.
*/
if (i == i0 && toolong && wrapbmp_width() > 0) {
#ifdef WILLUSDEBUG
printf("wrapflush8\n");
#endif
wrapbmp_flush(masterinfo, 1, pageinfo, 0);
i--;
continue;
}
/*
** If we're not too long and we're not done yet, add another word.
*/
if (i < colbreaks->n - 1 && !toolong)
continue;
/*
** Add the regions from i0 to i (or i0 to i-1)
*/
break;
}
if (i > i0 && toolong)
i--;
i1 = src_left_to_right ? i0 : colbreaks->n - 1 - i;
i2 = src_left_to_right ? i : colbreaks->n - 1 - i0;
reg = (*newregion);
reg.c1 = colbreaks->textrow[i1].c1;
reg.c2 = colbreaks->textrow[i2].c2;
#if (WILLUSDEBUGX & 4)
printf(" Adding i1=%d to i2=%d\n",i1,i2);
#endif
/* Trim the word top/bottom */
bmpregion_trim_margins(&reg, colcount, rowcount, 0xc);
reg.c1 = colbreaks->textrow[i1].c1;
reg.c2 = colbreaks->textrow[i2].c2;
reg.lcheight = newregion->lcheight;
reg.capheight = newregion->capheight;
reg.rowbase = newregion->rowbase;
reg.h5050 = newregion->h5050;
if (reg.r1 > reg.rowbase)
reg.r1 = reg.rowbase;
if (reg.r2 < reg.rowbase)
reg.r2 = reg.rowbase;
/* Add it to the existing line queue */
wrapbmp_add(&reg, gappix, line_spacing, rowbase, mean_row_gap,
justflags);
if (toolong) {
#ifdef WILLUSDEBUG
printf("wrapflush7\n");
#endif
wrapbmp_flush(masterinfo, 1, pageinfo, 0);
}
i0 = i + 1;
}
breakinfo_free(106, colbreaks);
}
static WILLUSBITMAP _wrapbmp, *wrapbmp;
static int wrapbmp_base;
static int wrapbmp_line_spacing;
static int wrapbmp_gap;
static int wrapbmp_bgcolor;
static int wrapbmp_just;
static int wrapbmp_rhmax;
static int wrapbmp_thmax;
static int wrapbmp_maxgap = 2;
static int wrapbmp_height_extended;
static HYPHENINFO wrapbmp_hyphen;
void wrapbmp_init(void)
{
wrapbmp = &_wrapbmp;
bmp_init(wrapbmp);
wrapbmp_set_color(dst_color);
wrapbmp->width = 0;
wrapbmp->height = 0;
wrapbmp_base = 0;
wrapbmp_line_spacing = -1;
wrapbmp_gap = -1;
wrapbmp_bgcolor = -1;
wrapbmp_height_extended = 0;
wrapbmp_just = 0x8f;
wrapbmp_rhmax = -1;
wrapbmp_thmax = -1;
wrapbmp_hyphen.ch = -1;
just_flushed_internal = 0;
beginning_gap_internal = -1;
last_h5050_internal = -1;
}
static int wrapbmp_ends_in_hyphen(void)
{
return (wrapbmp_hyphen.ch >= 0);
}
static void wrapbmp_set_color(int is_color)
{
if (is_color)
wrapbmp->bpp = 24;
else {
int i;
wrapbmp->bpp = 8;
for (i = 0; i < 256; i++)
wrapbmp->red[i] = wrapbmp->blue[i] = wrapbmp->green[i] = i;
}
}
static void wrapbmp_free(void)
{
bmp_free(wrapbmp);
}
static void wrapbmp_set_maxgap(int value)
{
wrapbmp_maxgap = value;
}
static int wrapbmp_width(void)
{
return (wrapbmp->width);
}
static int wrapbmp_remaining(void)
{
int maxpix, w;
maxpix = max_region_width_inches * src_dpi;
/* Don't include hyphen if wrapbmp ends in a hyphen */
if (wrapbmp_hyphen.ch < 0)
w = wrapbmp->width;
else if (src_left_to_right)
w = wrapbmp_hyphen.c2 + 1;
else
w = wrapbmp->width - wrapbmp_hyphen.c2;
return (maxpix - w);
}
/*
** region = bitmap region to add to line
** gap = horizontal pixel gap between existing region and region being added
** line_spacing = desired spacing between lines of text (pixels)
** rbase = position of baseline in region
** gio = gap if over--gap above top of text if it goes over line_spacing.
*/
// static int bcount=0;
static void wrapbmp_add(BMPREGION *region, int gap, int line_spacing, int rbase,
int gio, int just_flags)
{
WILLUSBITMAP *tmp, _tmp;
int i, rh, th, bw, new_base, h2, bpp, width0;
// static char filename[256];
#ifdef WILLUSDEBUG
printf("@wrapbmp_add %d x %d (w=%d).\n",region->c2-region->c1+1,region->r2-region->r1+1,wrapbmp->width);
#endif
bmpregion_hyphen_detect(region); /* Figure out if what we're adding ends in a hyphen */
if (wrapbmp_ends_in_hyphen())
gap = 0;
wrapbmp_hyphen_erase();
just_flushed_internal = 0; // Reset "just flushed" flag
beginning_gap_internal = -1; // Reset top-of-page or top-of-column gap
last_h5050_internal = -1; // Reset last row font size
if (line_spacing > wrapbmp_line_spacing)
wrapbmp_line_spacing = line_spacing;
if (gio > wrapbmp_gap)
wrapbmp_gap = gio;
wrapbmp_bgcolor = region->bgcolor;
wrapbmp_just = just_flags;
/*
printf(" c1=%d, c2=%d, r1=%d, r2=%d\n",region->c1,region->c2,region->r1,region->r2);
printf(" gap=%d, line_spacing=%d, rbase=%d, gio=%d\n",gap,line_spacing,rbase,gio);
*/
bpp = dst_color ? 3 : 1;
rh = rbase - region->r1 + 1;
if (rh > wrapbmp_rhmax)
wrapbmp_rhmax = rh;
th = rh + (region->r2 - rbase);
if (th > wrapbmp_thmax)
wrapbmp_thmax = th;
/*
{
WILLUSBITMAP *bmp,_bmp;
bmp=&_bmp;
bmp_init(bmp);
bmp->height=region->r2-region->r1+1;
bmp->width=region->c2-region->c1+1;
bmp->bpp=bpp*8;
if (bpp==1)
for (i=0;i<256;i++)
bmp->red[i]=bmp->blue[i]=bmp->green[i]=i;
bmp_alloc(bmp);
bw=bmp_bytewidth(bmp);
memset(bmp_rowptr_from_top(bmp,0),255,bw*bmp->height);
for (i=region->r1;i<=region->r2;i++)
{
unsigned char *d,*s;
d=bmp_rowptr_from_top(bmp,i-region->r1);
s=bmp_rowptr_from_top(dst_color?region->bmp:region->bmp8,i)+bpp*region->c1;
if (i==rbase)
memset(d,0,bw);
else
memcpy(d,s,bw);
}
sprintf(filename,"out%05d.png",bcount++);
bmp_write(bmp,filename,stdout,100);
bmp_free(bmp);
}
*/
if (wrapbmp->width == 0) {
/* Put appropriate gap in */
if (last_rowbase_internal >= 0
&& rh < wrapbmp_line_spacing - last_rowbase_internal) {
rh = wrapbmp_line_spacing - last_rowbase_internal;
if (rh < 2)
rh = 2;
th = rh + (region->r2 - rbase);
wrapbmp_height_extended = 0;
} else
wrapbmp_height_extended = (last_rowbase_internal >= 0);
wrapbmp_base = rh - 1;
wrapbmp->height = th;
#ifdef WILLUSDEBUG
printf("@wrapbmp_add: bmpheight set to %d (wls=%d, lrbi=%d)\n",wrapbmp->height,wrapbmp_line_spacing,last_rowbase_internal);
#endif
wrapbmp->width = region->c2 - region->c1 + 1;
bmp_alloc(wrapbmp);
bw = bmp_bytewidth(wrapbmp);
memset(bmp_rowptr_from_top(wrapbmp, 0), 255, bw * wrapbmp->height);
for (i = region->r1; i <= region->r2; i++) {
unsigned char *d, *s;
d = bmp_rowptr_from_top(wrapbmp, wrapbmp_base + (i - rbase));
s = bmp_rowptr_from_top(dst_color ? region->bmp : region->bmp8, i)
+ bpp * region->c1;
memcpy(d, s, bw);
}
#ifdef WILLUSDEBUG
if (wrapbmp->height<=wrapbmp_base)
{
printf("1. SCREEECH!\n");
printf("wrapbmp = %d x %d, base=%d\n",wrapbmp->width,wrapbmp->height,wrapbmp_base);
exit(10);
}
#endif
/* Copy hyphen info from added region */
wrapbmp_hyphen = region->hyphen;
if (wrapbmp_ends_in_hyphen()) {
wrapbmp_hyphen.r1 += (wrapbmp_base - rbase);
wrapbmp_hyphen.r2 += (wrapbmp_base - rbase);
wrapbmp_hyphen.ch -= region->c1;
wrapbmp_hyphen.c2 -= region->c1;
}
return;
}
width0 = wrapbmp->width; /* Starting wrapbmp width */
tmp = &_tmp;
bmp_init(tmp);
bmp_copy(tmp, wrapbmp);
tmp->width += gap + region->c2 - region->c1 + 1;
if (rh > wrapbmp_base) {
wrapbmp_height_extended = 1;
new_base = rh - 1;
} else
new_base = wrapbmp_base;
if (region->r2 - rbase > wrapbmp->height - 1 - wrapbmp_base)
h2 = region->r2 - rbase;
else
h2 = wrapbmp->height - 1 - wrapbmp_base;
tmp->height = new_base + h2 + 1;
bmp_alloc(tmp);
bw = bmp_bytewidth(tmp);
memset(bmp_rowptr_from_top(tmp, 0), 255, bw * tmp->height);
bw = bmp_bytewidth(wrapbmp);
/*
printf("3. wbh=%d x %d, tmp=%d x %d x %d, new_base=%d, wbbase=%d\n",wrapbmp->width,wrapbmp->height,tmp->width,tmp->height,tmp->bpp,new_base,wrapbmp_base);
*/
for (i = 0; i < wrapbmp->height; i++) {
unsigned char *d, *s;
d = bmp_rowptr_from_top(tmp, i + new_base - wrapbmp_base)
+ (src_left_to_right ? 0 : tmp->width - 1 - wrapbmp->width)
* bpp;
s = bmp_rowptr_from_top(wrapbmp, i);
memcpy(d, s, bw);
}
bw = bpp * (region->c2 - region->c1 + 1);
if (region->r1 + new_base - rbase < 0
|| region->r2 + new_base - rbase > tmp->height - 1) {
aprintf(ANSI_YELLOW "INTERNAL ERROR--TMP NOT DIMENSIONED PROPERLY.\n");
aprintf("(%d-%d), tmp->height=%d\n" ANSI_NORMAL,
region->r1 + new_base - rbase, region->r2 + new_base - rbase,
tmp->height);
exit(10);
}
for (i = region->r1; i <= region->r2; i++) {
unsigned char *d, *s;
d = bmp_rowptr_from_top(tmp, i + new_base - rbase)
+ (src_left_to_right ? wrapbmp->width + gap : 0) * bpp;
s = bmp_rowptr_from_top(dst_color ? region->bmp : region->bmp8, i)
+ bpp * region->c1;
memcpy(d, s, bw);
}
bmp_copy(wrapbmp, tmp);
bmp_free(tmp);
/* Copy region's hyphen info */
wrapbmp_hyphen = region->hyphen;
if (wrapbmp_ends_in_hyphen()) {
wrapbmp_hyphen.r1 += (new_base - rbase);
wrapbmp_hyphen.r2 += (new_base - rbase);
if (src_left_to_right) {
wrapbmp_hyphen.ch += width0 + gap - region->c1;
wrapbmp_hyphen.c2 += width0 + gap - region->c1;
} else {
wrapbmp_hyphen.ch -= region->c1;
wrapbmp_hyphen.c2 -= region->c1;
}
}
wrapbmp_base = new_base;
#ifdef WILLUSDEBUG
if (wrapbmp->height<=wrapbmp_base)
{
printf("2. SCREEECH!\n");
printf("wrapbmp = %d x %d, base=%d\n",wrapbmp->width,wrapbmp->height,wrapbmp_base);
exit(10);
}
#endif
}
static void wrapbmp_flush(MASTERINFO *masterinfo, int allow_full_justification,
PAGEINFO *pageinfo, int use_bgi)
{
BMPREGION region;
WILLUSBITMAP *bmp8, _bmp8;
int gap, just, nomss, dh;
int *colcount, *rowcount;
static char *funcname = "wrapbmp_flush";
// char filename[256];
if (wrapbmp->width <= 0) {
if (use_bgi == 1 && beginning_gap_internal > 0)
dst_add_gap_src_pixels("wrapbmp_bgi0", masterinfo,
beginning_gap_internal);
beginning_gap_internal = -1;
last_h5050_internal = -1;
if (use_bgi)
just_flushed_internal = 1;
return;
}
#ifdef WILLUSDEBUG
printf("@wrapbmp_flush()\n");
#endif
/*
{
char filename[256];
int i;
static int bcount=0;
for (i=0;i<wrapbmp->height;i++)
{
unsigned char *p;
int j;
p=bmp_rowptr_from_top(wrapbmp,i);
for (j=0;j<wrapbmp->width;j++)
if (p[j]>240)
p[j]=192;
}
sprintf(filename,"out%05d.png",bcount++);
bmp_write(wrapbmp,filename,stdout,100);
}
*/
colcount = rowcount = NULL;
willus_dmem_alloc_warn(19, (void **) &colcount,
(wrapbmp->width + 16) * sizeof(int), funcname, 10);
willus_dmem_alloc_warn(20, (void **) &rowcount,
(wrapbmp->height + 16) * sizeof(int), funcname, 10);
region.c1 = 0;
region.c2 = wrapbmp->width - 1;
region.r1 = 0;
region.r2 = wrapbmp->height - 1;
region.rowbase = wrapbmp_base;
region.bmp = wrapbmp;
region.bgcolor = wrapbmp_bgcolor;
#ifdef WILLUSDEBUG
printf("Bitmap is %d x %d (baseline=%d)\n",wrapbmp->width,wrapbmp->height,wrapbmp_base);
#endif
/* Sanity check on row spacing -- don't let it be too large. */
nomss = wrapbmp_rhmax * 1.7; /* Nominal single-spaced height for this row */
if (last_rowbase_internal < 0)
dh = 0;
else {
dh = (int) (wrapbmp_line_spacing - last_rowbase_internal
- 1.2 * fabs(vertical_line_spacing) * nomss + .5);
if (vertical_line_spacing < 0.) {
int dh1;
if (wrapbmp_maxgap > 0)
dh1 = region.rowbase + 1 - wrapbmp_rhmax - wrapbmp_maxgap;
else
dh1 = (int) (wrapbmp_line_spacing - last_rowbase_internal
- 1.2 * nomss + .5);
if (dh1 > dh)
dh = dh1;
}
}
if (dh > 0) {
#ifdef WILLUSDEBUG
aprintf(ANSI_YELLOW "dh > 0 = %d" ANSI_NORMAL "\n",dh);
printf(" wrapbmp_line_spacing=%d\n",wrapbmp_line_spacing);
printf(" nomss = %d\n",nomss);
printf(" vls = %g\n",vertical_line_spacing);
printf(" lrbi=%d\n",last_rowbase_internal);
printf(" wrapbmp_maxgap=%d\n",wrapbmp_maxgap);
printf(" wrapbmp_rhmax=%d\n",wrapbmp_rhmax);
#endif
region.r1 = dh;
/*
if (dh>200)
{
bmp_write(wrapbmp,"out.png",stdout,100);
exit(10);
}
*/
}
if (wrapbmp->bpp == 24) {
bmp8 = &_bmp8;
bmp_init(bmp8);
bmp_convert_to_greyscale_ex(bmp8, wrapbmp);
region.bmp8 = bmp8;
} else
region.bmp8 = wrapbmp;
if (gap_override_internal > 0) {
region.r1 = wrapbmp_base - wrapbmp_rhmax + 1;
if (region.r1 < 0)
region.r1 = 0;
if (region.r1 > wrapbmp_base)
region.r1 = wrapbmp_base;
gap = gap_override_internal;
gap_override_internal = -1;
} else {
if (wrapbmp_height_extended)
gap = wrapbmp_gap;
else
gap = 0;
}
#ifdef WILLUSDEBUG
printf("wf: gap=%d\n",gap);
#endif
if (gap > 0)
dst_add_gap_src_pixels("wrapbmp", masterinfo, gap);
if (!allow_full_justification)
just = (wrapbmp_just & 0xcf) | 0x20;
else
just = wrapbmp_just;
bmpregion_add(&region, NULL, masterinfo, 0, 0, 0, -1.0, just, 2, colcount,
rowcount, pageinfo, 0xf, wrapbmp->height - 1 - wrapbmp_base);
if (wrapbmp->bpp == 24)
bmp_free(bmp8);
willus_dmem_free(20, (double **) &rowcount, funcname);
willus_dmem_free(19, (double **) &colcount, funcname);
wrapbmp->width = 0;
wrapbmp->height = 0;
wrapbmp_line_spacing = -1;
wrapbmp_gap = -1;
wrapbmp_rhmax = -1;
wrapbmp_thmax = -1;
wrapbmp_hyphen.ch = -1;
if (use_bgi == 1 && beginning_gap_internal > 0)
dst_add_gap_src_pixels("wrapbmp_bgi1", masterinfo,
beginning_gap_internal);
beginning_gap_internal = -1;
last_h5050_internal = -1;
if (use_bgi)
just_flushed_internal = 1;
}
static void wrapbmp_hyphen_erase(void)
{
WILLUSBITMAP *bmp, _bmp;
int bw, bpp, c0, c1, c2, i;
if (wrapbmp_hyphen.ch < 0)
return;
#if (WILLUSDEBUGX & 16)
printf("@hyphen_erase, bmp=%d x %d x %d\n",wrapbmp->width,wrapbmp->height,wrapbmp->bpp);
printf(" ch=%d, c2=%d, r1=%d, r2=%d\n",wrapbmp_hyphen.ch,wrapbmp_hyphen.c2,wrapbmp_hyphen.r1,wrapbmp_hyphen.r2);
#endif
bmp = &_bmp;
bmp_init(bmp);
bmp->bpp = wrapbmp->bpp;
if (bmp->bpp == 8)
for (i = 0; i < 256; i++)
bmp->red[i] = bmp->blue[i] = bmp->green[i] = i;
bmp->height = wrapbmp->height;
if (src_left_to_right) {
bmp->width = wrapbmp_hyphen.c2 + 1;
c0 = 0;
c1 = wrapbmp_hyphen.ch;
c2 = bmp->width - 1;
} else {
bmp->width = wrapbmp->width - wrapbmp_hyphen.c2;
c0 = wrapbmp_hyphen.c2;
c1 = 0;
c2 = wrapbmp_hyphen.ch - wrapbmp_hyphen.c2;
}
bmp_alloc(bmp);
bpp = bmp->bpp == 24 ? 3 : 1;
bw = bpp * bmp->width;
for (i = 0; i < bmp->height; i++)
memcpy(bmp_rowptr_from_top(bmp, i),
bmp_rowptr_from_top(wrapbmp, i) + bpp * c0, bw);
bw = (c2 - c1 + 1) * bpp;
if (bw > 0)
for (i = wrapbmp_hyphen.r1; i <= wrapbmp_hyphen.r2; i++)
memset(bmp_rowptr_from_top(bmp, i) + bpp * c1, 255, bw);
#if (WILLUSDEBUGX & 16)
{
static int count=1;
char filename[256];
sprintf(filename,"be%04d.png",count);
bmp_write(wrapbmp,filename,stdout,100);
sprintf(filename,"ae%04d.png",count);
bmp_write(bmp,filename,stdout,100);
count++;
}
#endif
bmp_copy(wrapbmp, bmp);
bmp_free(bmp);
}
/*
** src is only allocated if dst_color != 0
*/
static void white_margins(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey)
{
int i, n;
BMPREGION *region, _region;
region = &_region;
region->bmp = srcgrey;
get_white_margins(region);
n = region->c1;
for (i = 0; i < srcgrey->height; i++) {
unsigned char *p;
if (dst_color) {
p = bmp_rowptr_from_top(src, i);
memset(p, 255, n * 3);
}
p = bmp_rowptr_from_top(srcgrey, i);
memset(p, 255, n);
}
n = srcgrey->width - 1 - region->c2;
for (i = 0; i < srcgrey->height; i++) {
unsigned char *p;
if (dst_color) {
p = bmp_rowptr_from_top(src, i) + 3 * (src->width - n);
memset(p, 255, n * 3);
}
p = bmp_rowptr_from_top(srcgrey, i) + srcgrey->width - n;
memset(p, 255, n);
}
n = region->r1;
for (i = 0; i < n; i++) {
unsigned char *p;
if (dst_color) {
p = bmp_rowptr_from_top(src, i);
memset(p, 255, src->width * 3);
}
p = bmp_rowptr_from_top(srcgrey, i);
memset(p, 255, srcgrey->width);
}
n = srcgrey->height - 1 - region->r2;
for (i = srcgrey->height - n; i < srcgrey->height; i++) {
unsigned char *p;
if (dst_color) {
p = bmp_rowptr_from_top(src, i);
memset(p, 255, src->width * 3);
}
p = bmp_rowptr_from_top(srcgrey, i);
memset(p, 255, srcgrey->width);
}
}
static void get_white_margins(BMPREGION *region)
{
int n;
double defval;
defval = 0.25;
if (mar_left < 0.)
mar_left = defval;
n = (int) (0.5 + mar_left * src_dpi);
if (n > region->bmp->width)
n = region->bmp->width;
region->c1 = n;
if (mar_right < 0.)
mar_right = defval;
n = (int) (0.5 + mar_right * src_dpi);
if (n > region->bmp->width)
n = region->bmp->width;
region->c2 = region->bmp->width - 1 - n;
if (mar_top < 0.)
mar_top = defval;
n = (int) (0.5 + mar_top * src_dpi);
if (n > region->bmp->height)
n = region->bmp->height;
region->r1 = n;
if (mar_bot < 0.)
mar_bot = defval;
n = (int) (0.5 + mar_bot * src_dpi);
if (n > region->bmp->height)
n = region->bmp->height;
region->r2 = region->bmp->height - 1 - n;
}
/*
** bitmap_orientation()
**
** 1.0 means neutral
**
** >> 1.0 means document is likely portrait (no rotation necessary)
** (max is 100.)
**
** << 1.0 means document is likely landscape (need to rotate it)
** (min is 0.01)
**
*/
static double bitmap_orientation(WILLUSBITMAP *bmp)
{
int i, ic, wtcalc;
double hsum, vsum, rat;
wtcalc = -1;
for (vsum = 0., hsum = 0., ic = 0, i = 20; i <= 85; i += 5, ic++) {
double nv, nh;
int wth, wtv;
#ifdef DEBUG
printf("h %d:\n",i);
#endif
if (ic == 0)
wth = -1;
else
wth = wtcalc;
wth = -1;
nh = bmp_inflections_horizontal(bmp, 8, i, &wth);
#ifdef DEBUG
{
FILE *f;
f=fopen("inf.ep","a");
fprintf(f,"/ag\n");
fclose(f);
}
printf("v %d:\n",i);
#endif
if (ic == 0)
wtv = -1;
else
wtv = wtcalc;
wtv = -1;
nv = bmp_inflections_vertical(bmp, 8, i, &wtv);
if (ic == 0) {
if (wtv > wth)
wtcalc = wtv;
else
wtcalc = wth;
continue;
}
// exit(10);
hsum += nh * i * i * i;
vsum += nv * i * i * i;
}
if (vsum == 0. && hsum == 0.)
rat = 1.0;
else if (hsum < vsum && hsum / vsum < .01)
rat = 100.;
else
rat = vsum / hsum;
if (rat < .01)
rat = .01;
// printf(" page %2d: %8.4f\n",pagenum,rat);
// fprintf(out,"\t%8.4f",vsum/hsum);
// fprintf(out,"\n");
return (rat);
}
static double bmp_inflections_vertical(WILLUSBITMAP *srcgrey, int ndivisions,
int delta, int *wthresh)
{
int y0, y1, ny, i, nw, nisum, ni, wt, wtmax;
double *g;
char *funcname = "bmp_inflections_vertical";
nw = srcgrey->width / ndivisions;
y0 = srcgrey->height / 6;
y1 = srcgrey->height - y0;
ny = y1 - y0;
willus_dmem_alloc_warn(21, (void **) &g, ny * sizeof(double), funcname, 10);
wtmax = -1;
for (nisum = 0, i = 0; i < 10; i++) {
int x0, x1, nx, j;
x0 = (srcgrey->width - nw) * (i + 2) / 13;
x1 = x0 + nw;
if (x1 > srcgrey->width)
x1 = srcgrey->width;
nx = x1 - x0;
for (j = y0; j < y1; j++) {
int k, rsum;
unsigned char *p;
p = bmp_rowptr_from_top(srcgrey, j) + x0;
for (rsum = k = 0; k < nx; k++, p++)
rsum += p[0];
g[j - y0] = (double) rsum / nx;
}
wt = (*wthresh);
ni = inflection_count(g, ny, delta, &wt);
if ((*wthresh) < 0 && ni >= 3 && wt > wtmax)
wtmax = wt;
if (ni > nisum)
nisum = ni;
}
willus_dmem_free(21, &g, funcname);
if ((*wthresh) < 0)
(*wthresh) = wtmax;
return (nisum);
}
static double bmp_inflections_horizontal(WILLUSBITMAP *srcgrey, int ndivisions,
int delta, int *wthresh)
{
int x0, x1, nx, bw, i, nh, nisum, ni, wt, wtmax;
double *g;
char *funcname = "bmp_inflections_vertical";
nh = srcgrey->height / ndivisions;
x0 = srcgrey->width / 6;
x1 = srcgrey->width - x0;
nx = x1 - x0;
bw = bmp_bytewidth(srcgrey);
willus_dmem_alloc_warn(22, (void **) &g, nx * sizeof(double), funcname, 10);
wtmax = -1;
for (nisum = 0, i = 0; i < 10; i++) {
int y0, y1, ny, j;
y0 = (srcgrey->height - nh) * (i + 2) / 13;
y1 = y0 + nh;
if (y1 > srcgrey->height)
y1 = srcgrey->height;
ny = y1 - y0;
for (j = x0; j < x1; j++) {
int k, rsum;
unsigned char *p;
p = bmp_rowptr_from_top(srcgrey, y0) + j;
for (rsum = k = 0; k < ny; k++, p += bw)
rsum += p[0];
g[j - x0] = (double) rsum / ny;
}
wt = (*wthresh);
ni = inflection_count(g, nx, delta, &wt);
if ((*wthresh) < 0 && ni >= 3 && wt > wtmax)
wtmax = wt;
if (ni > nisum)
nisum = ni;
}
willus_dmem_free(22, &g, funcname);
if ((*wthresh) < 0)
(*wthresh) = wtmax;
return (nisum);
}
static int inflection_count(double *x, int n, int delta, int *wthresh)
{
int i, i0, ni, ww, c, ct, wt, mode;
double meandi, meandisq, f1, f2, stdev;
double *xs;
static int hist[256];
static char *funcname = "inflection_count";
/* Find threshold white value that peaks must exceed */
if ((*wthresh) < 0) {
for (i = 0; i < 256; i++)
hist[i] = 0;
for (i = 0; i < n; i++) {
i0 = floor(x[i]);
if (i0 > 255)
i0 = 255;
hist[i0]++;
}
ct = n * .15;
for (c = 0, i = 255; i >= 0; i--) {
c += hist[i];
if (c > ct)
break;
}
wt = i - 10;
if (wt < 192)
wt = 192;
#ifdef DEBUG
printf("wt=%d\n",wt);
#endif
(*wthresh) = wt;
} else
wt = (*wthresh);
ww = n / 150;
if (ww < 1)
ww = 1;
willus_dmem_alloc_warn(23, (void **) &xs, sizeof(double) * n, funcname, 10);
for (i = 0; i < n - ww; i++) {
int j;
for (xs[i] = 0., j = 0; j < ww; j++, xs[i] += x[i + j])
;
xs[i] /= ww;
}
meandi = meandisq = 0.;
if (xs[0] <= wt - delta)
mode = 1;
else if (xs[0] >= wt)
mode = -1;
else
mode = 0;
for (i0 = 0, ni = 0, i = 1; i < n - ww; i++) {
if (mode == 1 && xs[i] >= wt) {
if (i0 > 0) {
meandi += i - i0;
meandisq += (i - i0) * (i - i0);
ni++;
}
i0 = i;
mode = -1;
continue;
}
if (xs[i] <= wt - delta)
mode = 1;
}
stdev = 1.0; /* Avoid compiler warning */
if (ni > 0) {
meandi /= ni;
meandisq /= ni;
stdev = sqrt(fabs(meandi * meandi - meandisq));
}
f1 = meandi / n;
if (f1 > .15)
f1 = .15;
if (ni > 2) {
if (stdev / meandi < .05)
f2 = 20.;
else
f2 = meandi / stdev;
} else
f2 = 1.;
#ifdef DEBUG
printf(" ni=%3d, f1=%8.4f, f2=%8.4f, f1*f2*ni=%8.4f\n",ni,f1,f2,f1*f2*ni);
{
static int count=0;
FILE *f;
int i;
f=fopen("inf.ep",count==0?"w":"a");
count++;
fprintf(f,"/sa l \"%d\" 1\n",ni);
for (i=0;i<n-ww;i++)
fprintf(f,"%g\n",xs[i]);
fprintf(f,"//nc\n");
fclose(f);
}
#endif /* DEBUG */
willus_dmem_free(23, &xs, funcname);
return (f1 * f2 * ni);
}
static void pdfboxes_init(PDFBOXES *boxes)
{
boxes->n = boxes->na = 0;
boxes->box = NULL;
}
static void pdfboxes_free(PDFBOXES *boxes)
{
static char *funcname = "pdfboxes_free";
willus_dmem_free(24, (double **) &boxes->box, funcname);
}
#ifdef COMMENT
static void pdfboxes_add_box(PDFBOXES *boxes,PDFBOX *box)
{
static char *funcname="pdfboxes_add_box";
if (boxes->n>=boxes->na)
{
int newsize;
newsize = boxes->na < 1024 ? 2048 : boxes->na*2;
/* Just calls willus_mem_alloc if oldsize==0 */
willus_mem_realloc_robust_warn((void **)&boxes->box,newsize*sizeof(PDFBOX),
boxes->na*sizeof(PDFBOX),funcname,10);
boxes->na=newsize;
}
boxes->box[boxes->n++]=(*box);
}
static void pdfboxes_delete(PDFBOXES *boxes,int n)
{
if (n>0 && n<boxes->n)
{
int i;
for (i=0;i<boxes->n-n;i++)
boxes->box[i]=boxes->box[i+n];
}
boxes->n -= n;
if (boxes->n < 0)
boxes->n = 0;
}
#endif
/*
** Track gaps between words so that we can tell when one is out of family.
** lcheight = height of a lowercase letter.
*/
static void word_gaps_add(BREAKINFO *breakinfo, int lcheight,
double *median_gap)
{
static int nn = 0;
static double gap[1024];
static char *funcname = "word_gaps_add";
if (breakinfo != NULL && breakinfo->n > 1) {
int i;
for (i = 0; i < breakinfo->n - 1; i++) {
double g;
g = (double) breakinfo->textrow[i].gap / lcheight;
if (g >= word_spacing) {
gap[nn & 0x3ff] = g;
nn++;
}
}
}
if (median_gap != NULL) {
if (nn > 0) {
int n;
static double *gap_sorted;
n = (nn > 1024) ? 1024 : nn;
willus_dmem_alloc_warn(28, (void **) &gap_sorted,
sizeof(double) * n, funcname, 10);
memcpy(gap_sorted, gap, n * sizeof(double));
sortd(gap_sorted, n);
(*median_gap) = gap_sorted[n / 2];
willus_dmem_free(28, &gap_sorted, funcname);
} else
(*median_gap) = 0.7;
}
}
/*
** bmp must be grayscale! (cbmp = color, can be null)
*/
static void bmp_detect_vertical_lines(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp,
double dpi, double minwidth_in, double maxwidth_in, double minheight_in,
double anglemax_deg, int white_thresh)
{
int tc, iangle, irow, icol;
int rowstep, na, angle_sign, ccthresh;
int pixmin, halfwidth, bytewidth;
int bs1, nrsteps, dp;
double anglestep;
WILLUSBITMAP *tmp, _tmp;
unsigned char *p0;
if (debug)
printf("At bmp_detect_vertical_lines...\n");
if (!bmp_is_grayscale(bmp)) {
printf(
"Internal error. bmp_detect_vertical_lines passed a non-grayscale bitmap.\n");
exit(10);
}
tmp = &_tmp;
bmp_init(tmp);
bmp_copy(tmp, bmp);
dp = bmp_rowptr_from_top(tmp, 0) - bmp_rowptr_from_top(bmp, 0);
bytewidth = bmp_bytewidth(bmp);
pixmin = (int) (minwidth_in * dpi + .5);
if (pixmin < 1)
pixmin = 1;
halfwidth = pixmin / 4;
if (halfwidth < 1)
halfwidth = 1;
anglestep = atan2((double) halfwidth / dpi, minheight_in);
na = (int) ((anglemax_deg * PI / 180.) / anglestep + .5);
if (na < 1)
na = 1;
rowstep = (int) (dpi / 40. + .5);
if (rowstep < 2)
rowstep = 2;
nrsteps = bmp->height / rowstep;
bs1 = bytewidth * rowstep;
ccthresh = (int) (minheight_in * dpi / rowstep + .5);
if (ccthresh < 2)
ccthresh = 2;
if (debug && verbose)
printf(
" na = %d, rowstep = %d, ccthresh = %d, white_thresh = %d, nrsteps=%d\n",
na, rowstep, ccthresh, white_thresh, nrsteps);
/*
bmp_write(bmp,"out.png",stdout,97);
wfile_written_info("out.png",stdout);
*/
p0 = bmp_rowptr_from_top(bmp, 0);
for (tc = 0; tc < 100; tc++) {
int ccmax, ic0max, ir0max;
double tanthmax;
ccmax = -1;
ic0max = ir0max = 0;
tanthmax = 0.;
for (iangle = 0; iangle <= na; iangle++) {
for (angle_sign = 1; angle_sign >= -1; angle_sign -= 2) {
double th, tanth, tanthx;
int ic1, ic2;
if (iangle == 0 && angle_sign == -1)
continue;
th = (PI / 180.) * iangle * angle_sign * fabs(anglemax_deg)
/ na;
tanth = tan(th);
tanthx = tanth * rowstep;
if (angle_sign == 1) {
ic1 = -(int) (bmp->height * tanth + 1.);
ic2 = bmp->width - 1;
} else {
ic1 = (int) (-bmp->height * tanth + 1.);
ic2 = bmp->width - 1 + (int) (-bmp->height * tanth + 1.);
}
// printf("iangle=%2d, angle_sign=%2d, ic1=%4d, ic2=%4d\n",iangle,angle_sign,ic1,ic2);
for (icol = ic1; icol <= ic2; icol++) {
unsigned char *p;
int cc, ic0, ir0;
p = p0;
if (icol < 0 || icol > bmp->width - 1)
for (irow = 0; irow < nrsteps; irow++, p += bs1) {
int ic;
ic = icol + irow * tanthx;
if (ic >= 0 && ic < bmp->width)
break;
}
else
irow = 0;
for (ir0 = ic0 = cc = 0; irow < nrsteps; irow++, p += bs1) {
int ic;
ic = icol + irow * tanthx;
if (ic < 0 || ic >= bmp->width)
break;
if ((p[ic] < white_thresh
|| p[ic + bytewidth] < white_thresh)
&& (p[ic + dp] < white_thresh
|| p[ic + bytewidth + dp] < white_thresh)) {
if (cc == 0) {
ic0 = ic;
ir0 = irow * rowstep;
}
cc++;
if (cc > ccmax) {
ccmax = cc;
tanthmax = tanth;
ic0max = ic0;
ir0max = ir0;
}
} else
cc = 0;
}
}
}
}
if (ccmax < ccthresh)
break;
if (debug)
printf(
" Vert line detected: ccmax=%d (pix=%d), tanthmax=%g, ic0max=%d, ir0max=%d\n",
ccmax, ccmax * rowstep, tanthmax, ic0max, ir0max);
if (!vert_line_erase(bmp, cbmp, tmp, ir0max, ic0max, tanthmax,
minheight_in, minwidth_in, maxwidth_in, white_thresh))
break;
}
/*
bmp_write(tmp,"outt.png",stdout,95);
wfile_written_info("outt.png",stdout);
bmp_write(bmp,"out2.png",stdout,95);
wfile_written_info("out2.png",stdout);
exit(10);
*/
}
/*
** Calculate max vert line length. Line is terminated by nw consecutive white pixels
** on either side.
*/
static int vert_line_erase(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp,
WILLUSBITMAP *tmp, int row0, int col0, double tanth,
double minheight_in, double minwidth_in, double maxwidth_in,
int white_thresh)
{
int lw, cc, maxdev, nw, dir, i, n;
int *c1, *c2, *w;
static char *funcname = "vert_line_erase";
willus_dmem_alloc_warn(26, (void **) &c1, sizeof(int) * 3 * bmp->height,
funcname, 10);
c2 = &c1[bmp->height];
w = &c2[bmp->height];
/*
maxdev = (int)((double)bmp->height / minheight_in +.5);
if (maxdev < 3)
maxdev=3;
*/
nw = (int) ((double) src_dpi / 100. + .5);
if (nw < 2)
nw = 2;
maxdev = nw;
for (i = 0; i < bmp->height; i++)
c1[i] = c2[i] = -1;
n = 0;
for (dir = -1; dir <= 1; dir += 2) {
int del, brc;
brc = 0;
for (del = (dir == -1) ? 0 : 1; 1; del++) {
int r, c;
unsigned char *p;
r = row0 + dir * del;
if (r < 0 || r > bmp->height - 1)
break;
c = col0 + (r - row0) * tanth;
if (c < 0 || c > bmp->width - 1)
break;
p = bmp_rowptr_from_top(bmp, r);
for (i = c; i <= c + maxdev && i < bmp->width; i++)
if (p[i] < white_thresh)
break;
if (i > c + maxdev || i >= bmp->width) {
for (i = c - 1; i >= c - maxdev && i >= 0; i--)
if (p[i] < white_thresh)
break;
if (i < c - maxdev || i < 0) {
brc++;
if (brc >= nw)
break;
continue;
}
}
brc = 0;
for (c = i, cc = 0; i < bmp->width; i++)
if (p[i] < white_thresh)
cc = 0;
else {
cc++;
if (cc >= nw)
break;
}
c2[r] = i - cc;
if (c2[r] > bmp->width - 1)
c2[r] = bmp->width - 1;
for (cc = 0, i = c; i >= 0; i--)
if (p[i] < white_thresh)
cc = 0;
else {
cc++;
if (cc >= nw)
break;
}
c1[r] = i + cc;
if (c1[r] < 0)
c1[r] = 0;
w[n++] = c2[r] - c1[r] + 1;
c1[r] -= cc;
if (c1[r] < 0)
c1[r] = 0;
c2[r] += cc;
if (c2[r] > bmp->width - 1)
c2[r] = bmp->width - 1;
}
}
if (n > 1)
sorti(w, n);
if (n < 10 || n < minheight_in * src_dpi || w[n / 4] < minwidth_in * src_dpi
|| w[3 * n / 4] > maxwidth_in * src_dpi
|| (erase_vertical_lines == 1 && w[n - 1] > maxwidth_in * src_dpi)) {
/* Erase area in temp bitmap */
for (i = 0; i < bmp->height; i++) {
unsigned char *p;
int cmax;
if (c1[i] < 0 || c2[i] < 0)
continue;
cmax = (c2[i] - c1[i]) + 1;
p = bmp_rowptr_from_top(tmp, i) + c1[i];
for (; cmax > 0; cmax--, p++)
(*p) = 255;
}
} else {
/* Erase line width in source bitmap */
lw = w[3 * n / 4] + nw * 2;
if (lw > maxwidth_in * src_dpi / 2)
lw = maxwidth_in * src_dpi / 2;
for (i = 0; i < bmp->height; i++) {
unsigned char *p;
int c0, cmin, cmax, count, white;
if (c1[i] < 0 || c2[i] < 0)
continue;
c0 = col0 + (i - row0) * tanth;
cmin = c0 - lw - 1;
if (cmin < c1[i])
cmin = c1[i];
cmax = c0 + lw + 1;
if (cmax > c2[i])
cmax = c2[i];
p = bmp_rowptr_from_top(bmp, i);
c0 = (p[cmin] > p[cmax]) ? cmin : cmax;
white = p[c0];
if (white <= white_thresh)
white = white_thresh + 1;
if (white > 255)
white = 255;
count = (cmax - cmin) + 1;
p = &p[cmin];
for (; count > 0; count--, p++)
(*p) = white;
if (cbmp != NULL) {
unsigned char *p0;
p = bmp_rowptr_from_top(cbmp, i);
p0 = p + c0 * 3;
p = p + cmin * 3;
count = (cmax - cmin) + 1;
for (; count > 0; count--, p += 3) {
p[0] = p0[0];
p[1] = p0[1];
p[2] = p0[2];
}
}
}
}
willus_dmem_free(26, (double **) &c1, funcname);
return (1);
}
/*
** mem_index... controls which memory allocactions get a protective margin
** around them.
*/
static int mem_index_min = 999;
static int mem_index_max = 999;
static void willus_dmem_alloc_warn(int index, void **ptr, int size,
char *funcname, int exitcode)
{
if (index >= mem_index_min && index <= mem_index_max) {
char *ptr1;
void *x;
willus_mem_alloc_warn((void **) &ptr1, size + 2048, funcname, exitcode);
ptr1 += 1024;
x = (void *) ptr1;
(*ptr) = x;
} else
willus_mem_alloc_warn(ptr, size, funcname, exitcode);
}
static void willus_dmem_free(int index, double **ptr, char *funcname)
{
if ((*ptr) == NULL)
return;
if (index >= mem_index_min && index <= mem_index_max) {
double *x;
char *ptr1;
x = (*ptr);
ptr1 = (char *) x;
ptr1 -= 1024;
x = (double *) ptr1;
willus_mem_free(&x, funcname);
(*ptr) = NULL;
} else
willus_mem_free(ptr, funcname);
}
/* mem.c */
/*
** The reason I don't simply use malloc is because I want to allocate
** memory using type long instead of type size_t. On some compilers,
** like gcc, these are the same, so it doesn't matter. On other
** compilers, like Turbo C, these are different.
**
*/
static int willus_mem_alloc(double **ptr,long size,char *name)
{
#if (defined(WIN32) && !defined(__DMC__))
unsigned long memsize;
memsize = (unsigned long)size;
#ifdef USEGLOBAL
(*ptr) = (memsize==size) ? (double *)GlobalAlloc(GPTR,memsize) : NULL;
#else
(*ptr) = (memsize==size) ? (double *)CoTaskMemAlloc(memsize) : NULL;
#endif
#else
size_t memsize;
memsize=(size_t)size;
(*ptr) = (memsize==size) ? (double *)malloc(memsize) : NULL;
#endif
/*
{
f=fopen("mem.dat","a");
fprintf(f,"willus_mem_alloc(%d,%s)\n",size,name);
fclose(f);
}
*/
return((*ptr)!=NULL);
}
/*
** Prints an integer to 's' with commas separating every three digits.
** E.g. 45,399,350
** Correctly handles negative values.
*/
static void comma_print(char *s,long size)
{
int i,m,neg;
char tbuf[80];
if (!size)
{
s[0]='0';
s[1]='\0';
return;
}
s[0]='\0';
neg=0;
if (size<0)
{
size=-size;
neg=1;
}
for (i=0,m=size%1000;size;i++,size=(size-m)/1000,m=size%1000)
{
sprintf(tbuf,m==size ? "%d%s":"%03d%s",m,i>0 ? "," : "");
strcat(tbuf,s);
strcpy(s,tbuf);
}
if (neg)
{
strcpy(tbuf,"-");
strcat(tbuf,s);
strcpy(s,tbuf);
}
}
static void mem_warn(char *name,int size,int exitcode)
{
static char buf[128];
aprintf("\n" ANSI_RED "\aCannot allocate enough memory for "
"function %s." ANSI_NORMAL "\n",name);
comma_print(buf,size);
aprintf(" " ANSI_RED "(Needed %s bytes.)" ANSI_NORMAL "\n\n",buf);
if (exitcode!=0)
{
aprintf(" " ANSI_RED "Program terminated." ANSI_NORMAL "\n\n");
exit(exitcode);
}
}
static int willus_mem_alloc_warn(void **ptr, int size, char *name, int exitcode)
{
int status;
status = willus_mem_alloc((double **) ptr, (long) size, name);
if (!status)
mem_warn(name, size, exitcode);
return (status);
}
static void willus_mem_free(double **ptr, char *name)
{
if ((*ptr) != NULL) {
#if (defined(WIN32) && !defined(__DMC__))
#ifdef USEGLOBAL
GlobalFree((void *)(*ptr));
#else
CoTaskMemFree((void *)(*ptr));
#endif
#else
free((void *) (*ptr));
#endif
(*ptr) = NULL;
}
}
static int willus_mem_realloc_robust(double **ptr,long newsize,long oldsize,char *name)
{
#if (defined(WIN32) && !defined(__DMC__))
unsigned long memsize;
void *newptr;
#else
size_t memsize;
void *newptr;
#endif
#if (defined(WIN32) && !defined(__DMC__))
memsize=(unsigned long)newsize;
#else
memsize=(size_t)newsize;
#endif
if (memsize!=newsize)
return(0);
if ((*ptr)==NULL || oldsize<=0)
return(willus_mem_alloc(ptr,newsize,name));
#if (defined(WIN32) && !defined(__DMC__))
#ifdef USEGLOBAL
newptr = (void *)GlobalReAlloc((void *)(*ptr),memsize,GMEM_MOVEABLE);
#else
newptr = (void *)CoTaskMemRealloc((void *)(*ptr),memsize);
#endif
#else
newptr = realloc((void *)(*ptr),memsize);
#endif
if (newptr==NULL && willus_mem_alloc((double **)&newptr,newsize,name))
{
memcpy(newptr,(*ptr),oldsize);
willus_mem_free(ptr,name);
}
if (newptr==NULL)
return(0);
(*ptr) = newptr;
return(1);
}
static int willus_mem_realloc_robust_warn(void **ptr,int newsize,int oldsize,char *name,
int exitcode)
{
int status;
status = willus_mem_realloc_robust((double **)ptr,newsize,oldsize,name);
if (!status)
mem_warn(name,newsize,exitcode);
return(status);
}
/* math.c */
static void sortd(double *x, int n)
{
int top, n1;
double x0;
if (n < 2)
return;
top = n / 2;
n1 = n - 1;
while (1) {
if (top > 0) {
top--;
x0 = x[top];
} else {
x0 = x[n1];
x[n1] = x[0];
n1--;
if (!n1) {
x[0] = x0;
return;
}
}
{
int parent, child;
parent = top;
child = top * 2 + 1;
while (child <= n1) {
if (child < n1 && x[child] < x[child + 1])
child++;
if (x0 < x[child]) {
x[parent] = x[child];
parent = child;
child += (parent + 1);
} else
break;
}
x[parent] = x0;
}
}
}
static void sorti(int *x, int n)
{
int top, n1;
int x0;
if (n < 2)
return;
top = n / 2;
n1 = n - 1;
while (1) {
if (top > 0) {
top--;
x0 = x[top];
} else {
x0 = x[n1];
x[n1] = x[0];
n1--;
if (!n1) {
x[0] = x0;
return;
}
}
{
int parent, child;
parent = top;
child = top * 2 + 1;
while (child <= n1) {
if (child < n1 && x[child] < x[child + 1])
child++;
if (x0 < x[child]) {
x[parent] = x[child];
parent = child;
child += (parent + 1);
} else
break;
}
x[parent] = x0;
}
}
}
/* bmp.c */
/*
** Should call bmp_set_type() right after this to set the bitmap type.
*/
#define RGBSET24(bmp,ptr,r,g,b) \
if (bmp->type==WILLUSBITMAP_TYPE_NATIVE) \
{ \
ptr[0]=r; \
ptr[1]=g; \
ptr[2]=b; \
} \
else \
{ \
ptr[2]=r; \
ptr[1]=g; \
ptr[0]=b; \
}
#define RGBGET(bmp,ptr,r,g,b) \
if (bmp->bpp==8) \
{ \
r=bmp->red[ptr[0]]; \
g=bmp->green[ptr[0]]; \
b=bmp->blue[ptr[0]]; \
} \
else if (bmp->type==WILLUSBITMAP_TYPE_NATIVE) \
{ \
r=ptr[0]; \
g=ptr[1]; \
b=ptr[2]; \
} \
else \
{ \
r=ptr[2]; \
g=ptr[1]; \
b=ptr[0]; \
}
#define RGBGETINCPTR(bmp,ptr,r,g,b) \
if (bmp->bpp==8) \
{ \
r=bmp->red[ptr[0]]; \
g=bmp->green[ptr[0]]; \
b=bmp->blue[ptr[0]]; \
ptr++; \
} \
else if (bmp->type==WILLUSBITMAP_TYPE_NATIVE) \
{ \
r=ptr[0]; \
g=ptr[1]; \
b=ptr[2]; \
ptr+=3; \
} \
else \
{ \
r=ptr[2]; \
g=ptr[1]; \
b=ptr[0]; \
ptr+=3; \
}
static void bmp_init(WILLUSBITMAP *bmap)
{
bmap->data = NULL;
bmap->size_allocated = 0;
bmap->type = WILLUSBITMAP_TYPE_NATIVE;
}
static int bmp_bytewidth_win32(WILLUSBITMAP *bmp)
{
return(((bmp->bpp==24 ? bmp->width*3 : bmp->width)+3)&(~0x3));
}
/*
** The width, height, and bpp parameters of the WILLUSBITMAP structure
** should be set before calling this function.
*/
static int bmp_alloc(WILLUSBITMAP *bmap)
{
int size;
static char *funcname = "bmp_alloc";
if (bmap->bpp != 8 && bmap->bpp != 24) {
printf("Internal error: call to bmp_alloc has bpp!=8 and bpp!=24!\n");
exit(10);
}
/* Choose the max size even if not WIN32 to avoid memory faults */
/* and to allow the possibility of changing the "type" of the */
/* bitmap without reallocating memory. */
size = bmp_bytewidth_win32(bmap) * bmap->height;
if (bmap->data != NULL && bmap->size_allocated >= size)
return (1);
if (bmap->data != NULL)
willus_mem_realloc_robust_warn((void **) &bmap->data, size,
bmap->size_allocated, funcname, 10);
else
willus_mem_alloc_warn((void **) &bmap->data, size, funcname, 10);
bmap->size_allocated = size;
return (1);
}
static void bmp_free(WILLUSBITMAP *bmap)
{
if (bmap->data!=NULL)
{
willus_mem_free((double **)&bmap->data,"bmp_free");
bmap->data=NULL;
bmap->size_allocated=0;
}
}
/*
** If 8-bit, the bitmap is filled with <r>.
** If 24-bit, it gets <r>, <g>, <b> values.
*/
static void bmp_fill(WILLUSBITMAP *bmp,int r,int g,int b)
{
int y,n;
if (bmp->bpp==8 || (r==g && r==b))
{
memset(bmp->data,r,bmp->size_allocated);
return;
}
if (bmp->type==WILLUSBITMAP_TYPE_WIN32 && bmp->bpp==24)
{
y=r;
r=b;
b=y;
}
for (y=bmp->height-1;y>=0;y--)
{
unsigned char *p;
p=bmp_rowptr_from_top(bmp,y);
for (n=bmp->width-1;n>=0;n--)
{
(*p)=r;
p++;
(*p)=g;
p++;
(*p)=b;
p++;
}
}
}
static int bmp_copy(WILLUSBITMAP *dest, WILLUSBITMAP *src)
{
dest->width = src->width;
dest->height = src->height;
dest->bpp = src->bpp;
dest->type = src->type;
if (!bmp_alloc(dest))
return (0);
memcpy(dest->data, src->data, src->height * bmp_bytewidth(src));
memcpy(dest->red, src->red, sizeof(int) * 256);
memcpy(dest->green, src->green, sizeof(int) * 256);
memcpy(dest->blue, src->blue, sizeof(int) * 256);
return (1);
}
static int bmp_bytewidth(WILLUSBITMAP *bmp) {
return (bmp->bpp == 24 ? bmp->width * 3 : bmp->width);
}
/*
** row==0 ==> top row of bitmap
** row==bmp->height-1 ==> bottom row of bitmap
** (regardless of bitmap type)
*/
static unsigned char *bmp_rowptr_from_top(WILLUSBITMAP *bmp, int row)
{
if (bmp->type == WILLUSBITMAP_TYPE_WIN32)
return (&bmp->data[bmp_bytewidth(bmp) * (bmp->height - 1 - row)]);
else
return (&bmp->data[bmp_bytewidth(bmp) * row]);
}
/*
** Allocate more bitmap rows.
** ratio typically something like 1.5 or 2.0
*/
static void bmp_more_rows(WILLUSBITMAP *bmp, double ratio, int pixval)
{
int new_height, new_bytes, bw;
static char *funcname = "bmp_more_rows";
new_height = (int) (bmp->height * ratio + .5);
if (new_height <= bmp->height)
return;
bw = bmp_bytewidth(bmp);
new_bytes = bw * new_height;
if (new_bytes > bmp->size_allocated) {
willus_mem_realloc_robust_warn((void **) &bmp->data, new_bytes,
bmp->size_allocated, funcname, 10);
bmp->size_allocated = new_bytes;
}
/* Fill in */
memset(bmp_rowptr_from_top(bmp, bmp->height), pixval,
(new_height - bmp->height) * bw);
bmp->height = new_height;
}
static double resample_single(double *y,double x1,double x2)
{
int i,i1,i2;
double dx,dx1,dx2,sum;
i1=floor(x1);
i2=floor(x2);
if (i1==i2)
return(y[i1]);
dx=x2-x1;
if (dx>1.)
dx=1.;
dx1= 1.-(x1-i1);
dx2= x2-i2;
sum=0.;
if (dx1 > 1e-8*dx)
sum += dx1*y[i1];
if (dx2 > 1e-8*dx)
sum += dx2*y[i2];
for (i=i1+1;i<=i2-1;sum+=y[i],i++);
return(sum/(x2-x1));
}
/*
** Resample src[] into dst[].
** Examples: resample_1d(dst,src,0.,5.,5) would simply copy the
** first five elements of src[] to dst[].
**
** resample_1d(dst,src,0.,5.,10) would work as follows:
** dst[0] and dst[1] would get src[0].
** dst[2] and dst[3] would get src[1].
** and so on.
**
*/
static void resample_1d(double *dst,double *src,double x1,double x2,
int n)
{
int i;
double new,last;
last=x1;
for (i=0;i<n;i++)
{
new=x1+(x2-x1)*(i+1)/n;
dst[i] = resample_single(src,last,new);
last=new;
}
}
static void bmp_resample_1(double *tempbmp,WILLUSBITMAP *src,double x1,double y1,
double x2,double y2,int newwidth,int newheight,
double *temprow,int color)
{
int row,col,x0,dx,y0,dy;
x0=floor(x1);
dx=ceil(x2)-x0;
x1-=x0;
x2-=x0;
y0=floor(y1);
dy=ceil(y2)-y0;
y1-=y0;
y2-=y0;
if (src->type==WILLUSBITMAP_TYPE_WIN32 && color>=0)
color=2-color;
for (row=0;row<dy;row++)
{
unsigned char *p;
p=bmp_rowptr_from_top(src,row+y0);
if (src->bpp==8)
{
switch (color)
{
case -1:
for (col=0,p+=x0;col<dx;col++,p++)
temprow[col]=p[0];
break;
case 0:
for (col=0,p+=x0;col<dx;col++,p++)
temprow[col]=src->red[p[0]];
break;
case 1:
for (col=0,p+=x0;col<dx;col++,p++)
temprow[col]=src->green[p[0]];
break;
case 2:
for (col=0,p+=x0;col<dx;col++,p++)
temprow[col]=src->blue[p[0]];
break;
}
}
else
{
p+=color;
for (col=0,p+=3*x0;col<dx;temprow[col]=p[0],col++,p+=3);
}
resample_1d(&tempbmp[row*newwidth],temprow,x1,x2,newwidth);
}
for (col=0;col<newwidth;col++)
{
double *p,*s;
p=&tempbmp[col];
s=&temprow[dy];
for (row=0;row<dy;row++,p+=newwidth)
temprow[row]=p[0];
resample_1d(s,temprow,y1,y2,newheight);
p=&tempbmp[col];
for (row=0;row<newheight;row++,p+=newwidth,s++)
p[0]=s[0];
}
}
/*
** Resample (re-size) bitmap. The pixel positions left to right go from
** 0.0 to src->width (x-coord), and top to bottom go from
** 0.0 to src->height (y-coord).
** The cropped rectangle (x1,y1) to (x2,y2) is placed into
** the destination bitmap, which need not be allocated yet.
**
** The destination bitmap will be 8-bit grayscale if the source bitmap
** passes the bmp_is_grayscale() function. Otherwise it will be 24-bit.
**
** Returns 0 for okay.
** -1 for not enough memory.
** -2 for bad cropping area or destination bitmap size
*/
static int bmp_resample(WILLUSBITMAP *dest, WILLUSBITMAP *src, double x1,
double y1, double x2, double y2, int newwidth, int newheight)
{
int gray, maxlen, colorplanes;
double t;
double *tempbmp;
double *temprow;
int color, hmax, row, col, dy;
static char *funcname = "bmp_resample";
/* Clip and sort x1,y1 and x2,y2 */
if (x1 > src->width)
x1 = src->width;
else if (x1 < 0.)
x1 = 0.;
if (x2 > src->width)
x2 = src->width;
else if (x2 < 0.)
x2 = 0.;
if (y1 > src->height)
y1 = src->height;
else if (y1 < 0.)
y1 = 0.;
if (y2 > src->height)
y2 = src->height;
else if (y2 < 0.)
y2 = 0.;
if (x2 < x1) {
t = x2;
x2 = x1;
x1 = t;
}
if (y2 < y1) {
t = y2;
y2 = y1;
y1 = t;
}
dy = y2 - y1;
dy += 2;
if (x2 - x1 == 0. || y2 - y1 == 0.)
return (-2);
/* Allocate temp storage */
maxlen = x2 - x1 > dy + newheight ? (int) (x2 - x1) : dy + newheight;
maxlen += 16;
hmax = newheight > dy ? newheight : dy;
if (!willus_mem_alloc(&temprow, maxlen * sizeof(double), funcname))
return (-1);
if (!willus_mem_alloc(&tempbmp, hmax * newwidth * sizeof(double),
funcname)) {
willus_mem_free(&temprow, funcname);
return (-1);
}
if ((gray = bmp_is_grayscale(src)) != 0) {
int i;
dest->bpp = 8;
for (i = 0; i < 256; i++)
dest->red[i] = dest->blue[i] = dest->green[i] = i;
} else
dest->bpp = 24;
dest->width = newwidth;
dest->height = newheight;
dest->type = WILLUSBITMAP_TYPE_NATIVE;
if (!bmp_alloc(dest)) {
willus_mem_free(&tempbmp, funcname);
willus_mem_free(&temprow, funcname);
return (-1);
}
colorplanes = gray ? 1 : 3;
for (color = 0; color < colorplanes; color++) {
bmp_resample_1(tempbmp, src, x1, y1, x2, y2, newwidth, newheight,
temprow, gray ? -1 : color);
for (row = 0; row < newheight; row++) {
unsigned char *p;
double *s;
p = bmp_rowptr_from_top(dest, row) + color;
s = &tempbmp[row * newwidth];
if (colorplanes == 1)
for (col = 0; col < newwidth;
p[0] = (int) (s[0] + .5), col++, s++, p++)
;
else
for (col = 0; col < newwidth;
p[0] = (int) (s[0] + .5), col++, s++, p += colorplanes)
;
}
}
willus_mem_free(&tempbmp, funcname);
willus_mem_free(&temprow, funcname);
return (0);
}
static int bmp8_greylevel_convert(int r,int g,int b)
{
return((int)((r*0.3+g*0.59+b*0.11)*1.002));
}
/*
** One of dest or src can be NULL, which is the
** same as setting them equal to each other, but
** in this case, the bitmap must be 24-bit!
*/
static int bmp_is_grayscale(WILLUSBITMAP *bmp)
{
int i;
if (bmp->bpp!=8)
return(0);
for (i=0;i<256;i++)
if (bmp->red[i]!=i || bmp->green[i]!=i || bmp->blue[i]!=i)
return(0);
return(1);
}
static void bmp_color_xform8(WILLUSBITMAP *dest,WILLUSBITMAP *src,unsigned char *newval)
{
int i,ir;
if (src==NULL)
src=dest;
if (dest==NULL)
dest=src;
if (dest!=src)
{
dest->width = src->width;
dest->height = src->height;
dest->bpp = 8;
for (i=0;i<256;i++)
dest->red[i]=dest->green[i]=dest->blue[i]=i;
bmp_alloc(dest);
}
for (ir=0;ir<src->height;ir++)
{
unsigned char *sp,*dp;
sp=bmp_rowptr_from_top(src,ir);
dp=bmp_rowptr_from_top(dest,ir);
for (i=0;i<src->width;i++)
dp[i]=newval[sp[i]];
}
}
/*
** One of dest or src can be NULL, which is the
** same as setting them equal to each other, but
** in this case, the bitmap must be 24-bit!
*/
static void bmp_color_xform(WILLUSBITMAP *dest,WILLUSBITMAP *src,unsigned char *newval)
{
int ir,ic;
if (src==NULL)
src=dest;
if (dest==NULL)
dest=src;
if (bmp_is_grayscale(src))
{
bmp_color_xform8(dest,src,newval);
return;
}
if (dest!=src)
{
dest->width = src->width;
dest->height = src->height;
dest->bpp = 24;
bmp_alloc(dest);
}
for (ir=0;ir<src->height;ir++)
{
unsigned char *sp,*dp;
sp=bmp_rowptr_from_top(src,ir);
dp=bmp_rowptr_from_top(dest,ir);
for (ic=0;ic<src->width;ic++,dp+=3)
{
int r,g,b;
RGBGETINCPTR(src,sp,r,g,b);
r=newval[r];
g=newval[g];
b=newval[b];
RGBSET24(dest,dp,r,g,b);
}
}
}
/*
** One of dest or src can be NULL, which is the
** same as setting them equal to each other, but
** in this case, the bitmap must be 24-bit!
** Note: contrast > 1 will increase the contrast.
** contrast < 1 will decrease the contrast.
** contrast of 0 will make all pixels the same value.
** contrast of 1 will not change the image.
*/
static void bmp_contrast_adjust(WILLUSBITMAP *dest,WILLUSBITMAP *src,double contrast)
{
int i;
static unsigned char newval[256];
for (i=0;i<256;i++)
{
double x,y;
int sgn,v;
x=(i-127.5)/127.5;
sgn = x<0 ? -1 : 1;
if (contrast<0)
sgn = -sgn;
x=fabs(x);
if (fabs(contrast)>1.5)
y=x<.99999 ? 1-exp(fabs(contrast)*x/(x-1)) : 1.;
else
{
y=fabs(contrast)*x;
if (y>1.)
y=1.;
}
y = 127.5+y*sgn*127.5;
v = (int)(y+.5);
if (v<0)
v=0;
if (v>255)
v=255;
newval[i] = v;
}
bmp_color_xform(dest,src,newval);
}
/*
** Convert bitmap to grey-scale in-situ
*/
static void bmp_convert_to_greyscale_ex(WILLUSBITMAP *dst, WILLUSBITMAP *src)
{
int oldbpr, newbpr, bpp, dp, rownum, colnum, i;
oldbpr = bmp_bytewidth(src);
dp = src->bpp == 8 ? 1 : 3;
bpp = src->bpp;
dst->bpp = 8;
for (i = 0; i < 256; i++)
dst->red[i] = dst->green[i] = dst->blue[i] = i;
if (dst != src) {
dst->width = src->width;
dst->height = src->height;
bmp_alloc(dst);
}
newbpr = bmp_bytewidth(dst);
/* Possibly restore src->bpp to 24 so RGBGET works right (src & dst may be the same) */
src->bpp = bpp;
for (rownum = 0; rownum < src->height; rownum++) {
unsigned char *oldp, *newp;
oldp = &src->data[oldbpr * rownum];
newp = &dst->data[newbpr * rownum];
for (colnum = 0; colnum < src->width; colnum++, oldp += dp, newp++) {
int r, g, b;
RGBGET(src, oldp, r, g, b);
(*newp) = bmp8_greylevel_convert(r, g, b);
}
}
dst->bpp = 8; /* Possibly restore dst->bpp to 8 since src & dst may be the same. */
}
/* bmpmupdf.c */
static int bmpmupdf_pixmap_to_bmp(WILLUSBITMAP *bmp, fz_context *ctx,
fz_pixmap *pixmap)
{
unsigned char *p;
int ncomp, i, row, col;
bmp->width = fz_pixmap_width(ctx, pixmap);
bmp->height = fz_pixmap_height(ctx, pixmap);
ncomp = fz_pixmap_components(ctx, pixmap);
/* Has to be 8-bit or RGB */
if (ncomp != 2 && ncomp != 4)
return (-1);
bmp->bpp = (ncomp == 2) ? 8 : 24;
bmp_alloc(bmp);
if (ncomp == 2)
for (i = 0; i < 256; i++)
bmp->red[i] = bmp->green[i] = bmp->blue[i] = i;
p = fz_pixmap_samples(ctx, pixmap);
if (ncomp == 1)
for (row = 0; row < bmp->height; row++) {
unsigned char *dest;
dest = bmp_rowptr_from_top(bmp, row);
memcpy(dest, p, bmp->width);
p += bmp->width;
}
else if (ncomp == 2)
for (row = 0; row < bmp->height; row++) {
unsigned char *dest;
dest = bmp_rowptr_from_top(bmp, row);
for (col = 0; col < bmp->width; col++, dest++, p += 2)
dest[0] = p[0];
}
else
for (row = 0; row < bmp->height; row++) {
unsigned char *dest;
dest = bmp_rowptr_from_top(bmp, row);
for (col = 0; col < bmp->width;
col++, dest += ncomp - 1, p += ncomp)
memcpy(dest, p, ncomp - 1);
}
return (0);
}
static void handle(int wait, ddjvu_context_t *ctx)
{
const ddjvu_message_t *msg;
if (!ctx)
return;
if (wait)
msg = ddjvu_message_wait(ctx);
while ((msg = ddjvu_message_peek(ctx)))
{
switch(msg->m_any.tag)
{
case DDJVU_ERROR:
fprintf(stderr,"ddjvu: %s\n", msg->m_error.message);
if (msg->m_error.filename)
fprintf(stderr,"ddjvu: '%s:%d'\n",
msg->m_error.filename, msg->m_error.lineno);
exit(10);
default:
break;
}
}
ddjvu_message_pop(ctx);
}