ruby-changes:7166
From: akr <ko1@a...>
Date: Mon, 18 Aug 2008 12:16:08 +0900 (JST)
Subject: [ruby-changes:7166] Ruby:r18685 (trunk): * io.c (io_shift_crbuf): add strp argument to append into existing
akr 2008-08-18 12:13:53 +0900 (Mon, 18 Aug 2008) New Revision: 18685 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18685 Log: * io.c (io_shift_crbuf): add strp argument to append into existing string. (read_all): use econv if enc2 is set. (io_getc): follow the io_shift_crbuf change. Modified files: trunk/ChangeLog trunk/io.c trunk/test/ruby/test_io_m17n.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 18684) +++ ChangeLog (revision 18685) @@ -1,3 +1,10 @@ +Mon Aug 18 12:12:29 2008 Tanaka Akira <akr@f...> + + * io.c (io_shift_crbuf): add strp argument to append into existing + string. + (read_all): use econv if enc2 is set. + (io_getc): follow the io_shift_crbuf change. + Mon Aug 18 10:35:25 2008 Tanaka Akira <akr@f...> * io.c (io_enc_str_converted): new function. Index: io.c =================================================================== --- io.c (revision 18684) +++ io.c (revision 18685) @@ -1406,15 +1406,137 @@ return str; } +static void +make_readconv(rb_io_t *fptr) +{ + if (!fptr->readconv) { + fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0); + if (!fptr->readconv) + rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name); + fptr->crbuf_off = 0; + fptr->crbuf_len = 0; + fptr->crbuf_capa = 1024; + fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa); + } +} + +static int +more_char(rb_io_t *fptr) +{ + const unsigned char *ss, *sp, *se; + unsigned char *ds, *dp, *de; + rb_econv_result_t res; + int putbackable; + int crbuf_len0; + + if (fptr->crbuf_len == fptr->crbuf_capa) + return 0; /* crbuf full */ + if (fptr->crbuf_len == 0) + fptr->crbuf_off = 0; + else if (fptr->crbuf_off + fptr->crbuf_len == fptr->crbuf_capa) { + memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len); + fptr->crbuf_off = 0; + } + + crbuf_len0 = fptr->crbuf_len; + + while (1) { + ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off; + se = sp + fptr->rbuf_len; + ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len; + de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa; + res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT); + fptr->rbuf_off += sp - ss; + fptr->rbuf_len -= sp - ss; + fptr->crbuf_len += dp - ds; + + putbackable = rb_econv_putbackable(fptr->readconv); + if (putbackable) { + rb_econv_putback(fptr->readconv, (unsigned char *)fptr->rbuf + fptr->rbuf_off - putbackable, putbackable); + fptr->rbuf_off -= putbackable; + fptr->rbuf_len += putbackable; + } + + rb_econv_check_error(fptr->readconv); + + if (crbuf_len0 != fptr->crbuf_len) + return 0; + + if (res == econv_finished) + return -1; + + if (res == econv_source_buffer_empty) { + if (fptr->rbuf_len == 0) { + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + if (io_fillbuf(fptr) == -1) { + ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len; + de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa; + res = rb_econv_convert(fptr->readconv, NULL, NULL, &dp, de, 0); + fptr->crbuf_len += dp - ds; + rb_econv_check_error(fptr->readconv); + } + } + } + } +} + static VALUE +io_shift_crbuf(rb_io_t *fptr, int len, VALUE *strp) +{ + VALUE str; + if (NIL_P(*strp)) { + *strp = str = rb_str_new(fptr->crbuf+fptr->crbuf_off, len); + } + else { + size_t slen; + str = *strp; + slen = RSTRING_LEN(str); + rb_str_resize(str, RSTRING_LEN(str) + len); + memcpy(RSTRING_PTR(str)+slen, fptr->crbuf+fptr->crbuf_off, len); + } + fptr->crbuf_off += len; + fptr->crbuf_len -= len; + OBJ_TAINT(str); + rb_enc_associate(str, fptr->enc); + /* xxx: set coderange */ + if (fptr->crbuf_len == 0) + fptr->crbuf_off = 0; + if (fptr->crbuf_off < fptr->crbuf_capa/2) { + memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len); + fptr->crbuf_off = 0; + } + return str; +} + +static VALUE read_all(rb_io_t *fptr, long siz, VALUE str) { - long bytes = 0; + long bytes; long n; - long pos = 0; - rb_encoding *enc = io_read_encoding(fptr); - int cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0; + long pos; + rb_encoding *enc; + int cr; + if (fptr->enc2) { + VALUE str = rb_str_new(NULL, 0); + make_readconv(fptr); + while (1) { + if (fptr->crbuf_len) { + io_shift_crbuf(fptr, fptr->crbuf_len, &str); + } + if (more_char(fptr) == -1) { + return io_enc_str_converted(str, fptr); + } + } + } + + bytes = 0; + pos = 0; + + enc = io_read_encoding(fptr); + cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0; + if (siz == 0) siz = BUFSIZ; if (NIL_P(str)) { str = rb_str_new(0, siz); @@ -1744,82 +1866,7 @@ rb_raise(rb_eRuntimeError, "rs modified"); } -static void -make_readconv(rb_io_t *fptr) -{ - if (!fptr->readconv) { - fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0); - if (!fptr->readconv) - rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name); - fptr->crbuf_off = 0; - fptr->crbuf_len = 0; - fptr->crbuf_capa = 1024; - fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa); - } -} - static int -more_char(rb_io_t *fptr) -{ - const unsigned char *ss, *sp, *se; - unsigned char *ds, *dp, *de; - rb_econv_result_t res; - int putbackable; - int crbuf_len0; - - if (fptr->crbuf_len == fptr->crbuf_capa) - return 0; /* crbuf full */ - if (fptr->crbuf_len == 0) - fptr->crbuf_off = 0; - else if (fptr->crbuf_off + fptr->crbuf_len == fptr->crbuf_capa) { - memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len); - fptr->crbuf_off = 0; - } - - crbuf_len0 = fptr->crbuf_len; - - while (1) { - ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off; - se = sp + fptr->rbuf_len; - ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len; - de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa; - res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT); - fptr->rbuf_off += sp - ss; - fptr->rbuf_len -= sp - ss; - fptr->crbuf_len += dp - ds; - - putbackable = rb_econv_putbackable(fptr->readconv); - if (putbackable) { - rb_econv_putback(fptr->readconv, (unsigned char *)fptr->rbuf + fptr->rbuf_off - putbackable, putbackable); - fptr->rbuf_off -= putbackable; - fptr->rbuf_len += putbackable; - } - - rb_econv_check_error(fptr->readconv); - - if (crbuf_len0 != fptr->crbuf_len) - return 0; - - if (res == econv_finished) - return -1; - - if (res == econv_source_buffer_empty) { - if (fptr->rbuf_len == 0) { - rb_thread_wait_fd(fptr->fd); - rb_io_check_closed(fptr); - if (io_fillbuf(fptr) == -1) { - ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len; - de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa; - res = rb_econv_convert(fptr->readconv, NULL, NULL, &dp, de, 0); - fptr->crbuf_len += dp - ds; - rb_econv_check_error(fptr->readconv); - } - } - } - } -} - -static int appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) { VALUE str = *strp; @@ -2356,31 +2403,14 @@ } static VALUE -io_shift_crbuf(rb_io_t *fptr, int len) -{ - VALUE str; - str = rb_str_new(fptr->crbuf+fptr->crbuf_off, len); - fptr->crbuf_off += len; - fptr->crbuf_len -= len; - OBJ_TAINT(str); - rb_enc_associate(str, fptr->enc); - /* xxx: set coderange */ - if (fptr->crbuf_len == 0) - fptr->crbuf_off = 0; - if (fptr->crbuf_off < fptr->crbuf_capa/2) { - memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len); - fptr->crbuf_off = 0; - } - return str; -} - -static VALUE io_getc(rb_io_t *fptr, rb_encoding *enc) { int r, n, cr = 0; VALUE str; if (fptr->enc2) { + VALUE str = Qnil; + if (!fptr->readconv) { make_readconv(fptr); } @@ -2401,16 +2431,16 @@ if (fptr->crbuf_len == 0) return Qnil; /* return an incomplete character just before EOF */ - return io_shift_crbuf(fptr, fptr->crbuf_len); + return io_shift_crbuf(fptr, fptr->crbuf_len, &str); } } if (MBCLEN_INVALID_P(r)) { r = rb_enc_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc); - return io_shift_crbuf(fptr, r); + return io_shift_crbuf(fptr, r, &str); } - return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r)); + return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r), &str); } if (io_fillbuf(fptr) < 0) { Index: test/ruby/test_io_m17n.rb =================================================================== --- test/ruby/test_io_m17n.rb (revision 18684) +++ test/ruby/test_io_m17n.rb (revision 18685) @@ -473,6 +473,77 @@ } end + def test_gets_invalid + with_pipe("utf-8:euc-jp") {|r, w| + before = "\u{3042}\u{3044}" + invalid = "\x80".force_encoding("utf-8") + after = "\u{3046}\u{3048}" + w << before + invalid + after + w.close + err = assert_raise(Encoding::InvalidByteSequence) { r.gets } + assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes) + assert_equal(after.encode("euc-jp"), r.gets) + } + end + + def test_getc_invalid + with_pipe("utf-8:euc-jp") {|r, w| + before1 = "\u{3042}" + before2 = "\u{3044}" + invalid = "\x80".force_encoding("utf-8") + after1 = "\u{3046}" + after2 = "\u{3048}" + w << before1 + before2 + invalid + after1 + after2 + w.close + assert_equal(before1.encode("euc-jp"), r.getc) + assert_equal(before2.encode("euc-jp"), r.getc) + err = assert_raise(Encoding::InvalidByteSequence) { r.getc } + assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes) + assert_equal(after1.encode("euc-jp"), r.getc) + assert_equal(after2.encode("euc-jp"), r.getc) + } + end + + def test_getc_invalid2 + with_pipe("utf-16le:euc-jp") {|r, w| + before1 = "\x42\x30".force_encoding("utf-16le") + before2 = "\x44\x30".force_encoding("utf-16le") + invalid = "\x00\xd8".force_encoding("utf-16le") + after1 = "\x46\x30".force_encoding("utf-16le") + after2 = "\x48\x30".force_encoding("utf-16le") + w << before1 + before2 + invalid + after1 + after2 + w.close + assert_equal(before1.encode("euc-jp"), r.getc) + assert_equal(before2.encode("euc-jp"), r.getc) + err = assert_raise(Encoding::InvalidByteSequence) { r.getc } + assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes) + assert_equal(after1.encode("euc-jp"), r.getc) + assert_equal(after2.encode("euc-jp"), r.getc) + } + end + + def test_read_all + with_pipe("utf-8:euc-jp") {|r, w| + str = "\u3042\u3044" + w << str + w.close + assert_equal(str.encode("euc-jp"), r.read) + } + end + + def test_read_all_invalid + with_pipe("utf-8:euc-jp") {|r, w| + before = "\u{3042}\u{3044}" + invalid = "\x80".force_encoding("utf-8") + after = "\u{3046}\u{3048}" + w << before + invalid + after + w.close + err = assert_raise(Encoding::InvalidByteSequence) { r.read } + assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes) + assert_equal(after.encode("euc-jp"), r.read) + } + end + def test_file_foreach with_tmpdir { generate_file('tst', 'a' * 8191 + "\xa1\xa1") -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/