diff --git a/apps/plugins/viewer.c b/apps/plugins/viewer.c old mode 100644 new mode 100755 index 7e03c55..0f04513 --- a/apps/plugins/viewer.c +++ b/apps/plugins/viewer.c @@ -422,6 +422,9 @@ static unsigned char *next_line_ptr; static struct font *pf; #endif +/* UTF-8 BOM */ +#define BOM "\xef\xbb\xbf" +#define BOM_SIZE 3 int glyph_width(int ch) { @@ -715,9 +718,22 @@ static void fill_buffer(long pos, unsigned char* buf, unsigned size) /* To minimize disk access, always read on sector boundaries */ unsigned numread, i; bool found_CR = false; + off_t offset = rb->lseek(fd, pos, SEEK_SET); + + if (offset == 0 && prefs.encoding == UTF_8) + { + /* check BOM */ + unsigned char bom[BOM_SIZE]; + + if (rb->read(fd, bom, BOM_SIZE) == BOM_SIZE) + { + if (memcmp(bom, BOM, BOM_SIZE)) + rb->lseek(fd, 0, SEEK_SET); + } + } - rb->lseek(fd, pos, SEEK_SET); numread = rb->read(fd, buf, size); + buf[numread] = 0; rb->button_clear_queue(); /* clear button queue */ for(i = 0; i < numread; i++) { @@ -1156,16 +1172,39 @@ static bool viewer_init(void) if (fd==-1) return false; - file_size = rb->filesize(fd); - if (file_size==-1) - return false; - /* Init mac_text value used in processing buffer */ mac_text = false; return true; } +/* When a file is UTF-8 file with BOM, if prefs.encoding is UTF-8, + * then file size decreases only BOM_SIZE. + */ +static void get_filesize(void) +{ + file_size = rb->filesize(fd); + if (file_size==-1) + return; + + if (prefs.encoding == UTF_8) + { + /* check BOM */ + unsigned char bom[BOM_SIZE]; + off_t cur = rb->lseek(fd, 0, SEEK_CUR); + + if (cur != 0) + rb->lseek(fd, 0, SEEK_SET); + + if (rb->read(fd, bom, BOM_SIZE) == BOM_SIZE) + { + if (!memcmp(bom, BOM, BOM_SIZE)) + file_size -= BOM_SIZE; + } + rb->lseek(fd, cur, SEEK_SET); + } +} + static void viewer_default_settings(void) { prefs.word_mode = WRAP; @@ -1202,6 +1241,8 @@ static void viewer_load_settings(void) /* same name as global, but not the same rb->memcpy(&old_prefs, &prefs, sizeof(struct preferences)); + get_filesize(); + data = (struct bookmark_file_data*)buffer; /* grab the text buffer */ data->bookmarked_files_count = 0; @@ -1341,6 +1382,8 @@ static bool encoding_setting(void) { static struct opt_items names[NUM_CODEPAGES]; int idx; + bool res; + enum codepages oldenc = prefs.encoding; for (idx = 0; idx < NUM_CODEPAGES; idx++) { @@ -1348,8 +1391,20 @@ static bool encoding_setting(void) names[idx].voice_id = -1; } - return rb->set_option("Encoding", &prefs.encoding, INT, names, + res = rb->set_option("Encoding", &prefs.encoding, INT, names, sizeof(names) / sizeof(names[0]), NULL); + + /* When prefs.encoding changes into UTF-8 or changes from UTF-8, + * filesize (file_size) might change. + * In addition, if prefs.encoding is UTF-8, then BOM does not read. + */ + if (oldenc != prefs.encoding && (oldenc == UTF_8 || prefs.encoding == UTF_8)) + { + get_filesize(); + fill_buffer(file_pos, buffer, buffer_size); + } + + return res; } static bool word_wrap_setting(void)