diff --git a/apps/plugins/viewer.c b/apps/plugins/viewer.c index 2ed0cd8..de97737 100644 --- a/apps/plugins/viewer.c +++ b/apps/plugins/viewer.c @@ -548,6 +548,11 @@ struct viewport vp; struct bookmark_info bookmarks[MAX_BOOKMARKS]; static int bookmark_count; +/* UTF-8 BOM */ +#define BOM "\xef\xbb\xbf" +#define BOM_SIZE 3 + +static bool is_bom = false; static int glyph_width(int ch) { @@ -830,7 +835,7 @@ static unsigned char* find_prev_line(const unsigned char* cur_line) /* (else return NULL and read previous block) */ /* Wrap downwards until too far, then use the one before. */ - while (p < cur_line && p != NULL) { + while (p != NULL && p < cur_line) { prev_line = p; p = find_next_line(prev_line, NULL); } @@ -841,15 +846,34 @@ static unsigned char* find_prev_line(const unsigned char* cur_line) return (unsigned char*) prev_line; } +static void check_bom(void) +{ + unsigned char bom[BOM_SIZE]; + off_t orig = rb->lseek(fd, 0, SEEK_CUR); + + is_bom = false; + + rb->lseek(fd, 0, SEEK_SET); + + if (rb->read(fd, bom, BOM_SIZE) == BOM_SIZE) + is_bom = !memcmp(bom, BOM, BOM_SIZE); + + rb->lseek(fd, orig, SEEK_SET); +} + static void fill_buffer(long pos, unsigned char* buf, unsigned size) { /* Read from file and preprocess the data */ /* To minimize disk access, always read on sector boundaries */ unsigned numread, i; bool found_CR = false; + off_t offset = rb->lseek(fd, pos, SEEK_SET); + + if (offset == 0 && prefs.encoding == UTF_8 && is_bom) + rb->lseek(fd, BOM_SIZE, SEEK_SET); - rb->lseek(fd, pos, SEEK_SET); numread = rb->read(fd, buf, size); + buf[numread] = 0; rb->button_clear_queue(); /* clear button queue */ for(i = 0; i < numread; i++) { @@ -1099,7 +1123,11 @@ static void viewer_draw(int col) get_next_line_position(&line_begin, &line_end, &line_is_short); if (line_end == NULL) - break; + { + if (BUFFER_OOB(line_begin)) + break; + line_end = buffer_end + 1; + } line_len = line_end - line_begin; @@ -1461,16 +1489,25 @@ static bool viewer_init(void) if (fd==-1) return false; - file_size = rb->filesize(fd); - if (file_size==-1) - return false; - /* Init mac_text value used in processing buffer */ mac_text = false; return true; } +/* When a file is UTF-8 file with BOM, if prefs.encoding is UTF-8, + * then file size decreases only BOM_SIZE. + */ +static void get_filesize(void) +{ + file_size = rb->filesize(fd); + if (file_size == -1) + return; + + if (prefs.encoding == UTF_8 && is_bom) + file_size -= BOM_SIZE; +} + static int bm_comp(const void *a, const void *b) { struct bookmark_info *pa; @@ -1850,6 +1887,9 @@ read_end: viewer_remove_last_read_bookmark(); + check_bom(); + get_filesize(); + buffer_end = BUFFER_END(); /* Update whenever file_pos changes */ if (BUFFER_OOB(screen_top_ptr)) @@ -2094,10 +2134,16 @@ static void calc_page(void) { if (bookmarks[i].flag & BOOKMARK_LAST) { + int screen_pos; + int screen_top; + + screen_pos = bookmarks[i].file_position; + screen_top = screen_pos % buffer_size; + file_pos = screen_pos - screen_top; + screen_top_ptr = buffer + screen_top; + cpage = bookmarks[i].page; cline = bookmarks[i].line; - file_pos = bookmarks[i].file_position; - screen_top_ptr = buffer; bookmarks[i].flag ^= BOOKMARK_LAST; buffer_end = BUFFER_END(); @@ -2134,6 +2180,8 @@ static bool encoding_setting(void) { static struct opt_items names[NUM_CODEPAGES]; int idx; + bool res; + enum codepages oldenc = prefs.encoding; for (idx = 0; idx < NUM_CODEPAGES; idx++) { @@ -2141,8 +2189,21 @@ static bool encoding_setting(void) names[idx].voice_id = -1; } - return rb->set_option("Encoding", &prefs.encoding, INT, names, + res = rb->set_option("Encoding", &prefs.encoding, INT, names, sizeof(names) / sizeof(names[0]), NULL); + + /* When prefs.encoding changes into UTF-8 or changes from UTF-8, + * filesize (file_size) might change. + * In addition, if prefs.encoding is UTF-8, then BOM does not read. + */ + if (oldenc != prefs.encoding && (oldenc == UTF_8 || prefs.encoding == UTF_8)) + { + check_bom(); + get_filesize(); + fill_buffer(file_pos, buffer, buffer_size); + } + + return res; } static bool word_wrap_setting(void)