From a7d5b0e9ac1ecd8153d5ceeaa796340755ccb692 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Sat, 6 Apr 2019 01:44:00 +0200 Subject: [PATCH 01/19] start --- vm/src/obj/mod.rs | 1 + vm/src/obj/objbyteinner.rs | 9 +++ vm/src/obj/objbytes.rs | 155 ++++++++++++++++++++----------------- 3 files changed, 96 insertions(+), 69 deletions(-) create mode 100644 vm/src/obj/objbyteinner.rs diff --git a/vm/src/obj/mod.rs b/vm/src/obj/mod.rs index 0419d827f0..c824e726d8 100644 --- a/vm/src/obj/mod.rs +++ b/vm/src/obj/mod.rs @@ -3,6 +3,7 @@ pub mod objbool; pub mod objbuiltinfunc; pub mod objbytearray; +pub mod objbyteinner; pub mod objbytes; pub mod objclassmethod; pub mod objcode; diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs new file mode 100644 index 0000000000..0607621f1a --- /dev/null +++ b/vm/src/obj/objbyteinner.rs @@ -0,0 +1,9 @@ +#[derive(Debug, Default, Clone)] +pub struct PyByteInner { + pub elements: Vec, +} +impl PyByteInner { + pub fn new(data: Vec) -> Self { + PyByteInner { elements: data } + } +} diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index f24c9273c5..8f43cd05f6 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -6,22 +6,37 @@ use std::ops::Deref; use num_traits::ToPrimitive; use crate::function::OptionalArg; -use crate::pyobject::{PyContext, PyObjectRef, PyRef, PyResult, PyValue}; +use crate::pyobject::{ + IntoPyObject, PyClassImpl, PyContext, PyObjectRef, PyRef, PyResult, PyValue, +}; use crate::vm::VirtualMachine; +use super::objbyteinner::PyByteInner; use super::objint; use super::objiter; use super::objtype::PyClassRef; +/// "bytes(iterable_of_ints) -> bytes\n\ +/// bytes(string, encoding[, errors]) -> bytes\n\ +/// bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\ +/// bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\ +/// bytes() -> empty bytes object\n\nConstruct an immutable array of bytes from:\n \ +/// - an iterable yielding integers in range(256)\n \ +/// - a text string encoded using the specified encoding\n \ +/// - any object implementing the buffer API.\n \ +/// - an integer"; +#[pyclass(name = "bytes", __inside_vm)] #[derive(Debug)] pub struct PyBytes { - value: Vec, + inner: PyByteInner, } type PyBytesRef = PyRef; impl PyBytes { - pub fn new(data: Vec) -> Self { - PyBytes { value: data } + pub fn new(elements: Vec) -> Self { + PyBytes { + inner: PyByteInner { elements }, + } } } @@ -29,7 +44,7 @@ impl Deref for PyBytes { type Target = [u8]; fn deref(&self) -> &[u8] { - &self.value + &self.inner.elements } } @@ -42,62 +57,74 @@ impl PyValue for PyBytes { // Binary data support // Fill bytes class methods: -pub fn init(context: &PyContext) { - let bytes_doc = - "bytes(iterable_of_ints) -> bytes\n\ - bytes(string, encoding[, errors]) -> bytes\n\ - bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\ - bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\ - bytes() -> empty bytes object\n\nConstruct an immutable array of bytes from:\n \ - - an iterable yielding integers in range(256)\n \ - - a text string encoded using the specified encoding\n \ - - any object implementing the buffer API.\n \ - - an integer"; - - extend_class!(context, &context.bytes_type, { - "__new__" => context.new_rustfunc(bytes_new), - "__eq__" => context.new_rustfunc(PyBytesRef::eq), - "__lt__" => context.new_rustfunc(PyBytesRef::lt), - "__le__" => context.new_rustfunc(PyBytesRef::le), - "__gt__" => context.new_rustfunc(PyBytesRef::gt), - "__ge__" => context.new_rustfunc(PyBytesRef::ge), - "__hash__" => context.new_rustfunc(PyBytesRef::hash), - "__repr__" => context.new_rustfunc(PyBytesRef::repr), - "__len__" => context.new_rustfunc(PyBytesRef::len), - "__iter__" => context.new_rustfunc(PyBytesRef::iter), - "__doc__" => context.new_str(bytes_doc.to_string()) - }); - - let bytesiterator_type = &context.bytesiterator_type; - extend_class!(context, bytesiterator_type, { - "__next__" => context.new_rustfunc(PyBytesIteratorRef::next), - "__iter__" => context.new_rustfunc(PyBytesIteratorRef::iter), - }); -} - -fn bytes_new( - cls: PyClassRef, - val_option: OptionalArg, - vm: &VirtualMachine, -) -> PyResult { - // Create bytes data: - let value = if let OptionalArg::Present(ival) = val_option { - let elements = vm.extract_elements(&ival)?; - let mut data_bytes = vec![]; - for elem in elements.iter() { - let v = objint::to_int(vm, elem, 10)?; - data_bytes.push(v.to_u8().unwrap()); - } - data_bytes - // return Err(vm.new_type_error("Cannot construct bytes".to_string())); - } else { - vec![] - }; - PyBytes::new(value).into_ref_with_type(vm, cls) +pub fn get_value<'a>(obj: &'a PyObjectRef) -> impl Deref> + 'a { + &obj.payload::().unwrap().inner.elements } +// pub fn init(context: &PyContext) { +// let bytes_doc = +pub fn init(ctx: &PyContext) { + PyBytesRef::extend_class(ctx, &ctx.bytes_type); +} +//extend_class!(context, &context.bytes_type, { +//"__new__" => context.new_rustfunc(bytes_new), +/* "__eq__" => context.new_rustfunc(PyBytesRef::eq), +"__lt__" => context.new_rustfunc(PyBytesRef::lt), +"__le__" => context.new_rustfunc(PyBytesRef::le), +"__gt__" => context.new_rustfunc(PyBytesRef::gt), +"__ge__" => context.new_rustfunc(PyBytesRef::ge), +"__hash__" => context.new_rustfunc(PyBytesRef::hash), +"__repr__" => context.new_rustfunc(PyBytesRef::repr), +"__len__" => context.new_rustfunc(PyBytesRef::len), +"__iter__" => context.new_rustfunc(PyBytesRef::iter), +"__doc__" => context.new_str(bytes_doc.to_string())*/ +// }); + +/* let bytesiterator_type = &context.bytesiterator_type; +extend_class!(context, bytesiterator_type, { + "__next__" => context.new_rustfunc(PyBytesIteratorRef::next), + "__iter__" => context.new_rustfunc(PyBytesIteratorRef::iter), +});*/ +//} +#[pyimpl(__inside_vm)] impl PyBytesRef { + #[pymethod(name = "__new__")] + fn bytes_new( + cls: PyClassRef, + val_option: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult { + // Create bytes data: + let value = if let OptionalArg::Present(ival) = val_option { + let elements = vm.extract_elements(&ival)?; + let mut data_bytes = vec![]; + for elem in elements.iter() { + let v = objint::to_int(vm, elem, 10)?; + data_bytes.push(v.to_u8().unwrap()); + } + data_bytes + // return Err(vm.new_type_error("Cannot construct bytes".to_string())); + } else { + vec![] + }; + + PyBytes::new(value).into_ref_with_type(vm, cls) + } + + #[pymethod(name = "__repr__")] + fn repr(self, _vm: &VirtualMachine) -> String { + // TODO: don't just unwrap + let data = self.inner.elements.clone(); + format!("b'{:?}'", data) + } + + #[pymethod(name = "__len__")] + fn len(self, _vm: &VirtualMachine) -> usize { + self.inner.elements.len() + } +} +/* fn eq(self, other: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef { if let Ok(other) = other.downcast::() { vm.ctx.new_bool(self.value == other.value) @@ -138,9 +165,6 @@ impl PyBytesRef { } } - fn len(self, _vm: &VirtualMachine) -> usize { - self.value.len() - } fn hash(self, _vm: &VirtualMachine) -> u64 { let mut hasher = DefaultHasher::new(); @@ -148,11 +172,6 @@ impl PyBytesRef { hasher.finish() } - fn repr(self, _vm: &VirtualMachine) -> String { - // TODO: don't just unwrap - let data = String::from_utf8(self.value.clone()).unwrap(); - format!("b'{}'", data) - } fn iter(self, _vm: &VirtualMachine) -> PyBytesIterator { PyBytesIterator { @@ -162,9 +181,7 @@ impl PyBytesRef { } } -pub fn get_value<'a>(obj: &'a PyObjectRef) -> impl Deref> + 'a { - &obj.payload::().unwrap().value -} + #[derive(Debug)] pub struct PyBytesIterator { @@ -194,4 +211,4 @@ impl PyBytesIteratorRef { fn iter(self, _vm: &VirtualMachine) -> Self { self } -} +}*/ From 10cbf2ae740c0fe7de2151a2e3daf88576c7a394 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Sun, 7 Apr 2019 01:49:16 +0200 Subject: [PATCH 02/19] bytes._new stuff --- vm/src/obj/objbytes.rs | 81 +++++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 12 deletions(-) diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 8f43cd05f6..26de250107 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -1,3 +1,9 @@ +use crate::obj::objint::PyInt; +use crate::obj::objlist; +use crate::obj::objlist::PyList; +use crate::obj::objstr::PyString; +use crate::obj::objtuple::PyTuple; +use crate::obj::objtype; use std::cell::Cell; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; @@ -7,7 +13,8 @@ use num_traits::ToPrimitive; use crate::function::OptionalArg; use crate::pyobject::{ - IntoPyObject, PyClassImpl, PyContext, PyObjectRef, PyRef, PyResult, PyValue, + IntoPyObject, PyClassImpl, PyContext, PyIterable, PyObjectRef, PyRef, PyResult, PyValue, + TryFromObject, TypeProtocol, }; use crate::vm::VirtualMachine; @@ -87,29 +94,79 @@ extend_class!(context, bytesiterator_type, { "__iter__" => context.new_rustfunc(PyBytesIteratorRef::iter), });*/ //} + +fn load_byte( + elements: PyResult>, + vm: &VirtualMachine, +) -> Result, PyObjectRef> { + if let Ok(value) = elements { + let mut data_bytes = vec![]; + for elem in value.iter() { + let v = objint::to_int(vm, &elem, 10)?; + if let Some(i) = v.to_u8() { + data_bytes.push(i); + } else { + return Err(vm.new_value_error("byte must be in range(0, 256)".to_string())); + } + } + Ok(data_bytes) + } else { + Err(vm.new_value_error("byte must be in range(0, 256)".to_string())) + } +} + #[pyimpl(__inside_vm)] impl PyBytesRef { #[pymethod(name = "__new__")] fn bytes_new( cls: PyClassRef, val_option: OptionalArg, + enc_option: OptionalArg, vm: &VirtualMachine, ) -> PyResult { // Create bytes data: - let value = if let OptionalArg::Present(ival) = val_option { - let elements = vm.extract_elements(&ival)?; - let mut data_bytes = vec![]; - for elem in elements.iter() { - let v = objint::to_int(vm, elem, 10)?; - data_bytes.push(v.to_u8().unwrap()); + if let OptionalArg::Present(enc) = enc_option { + if let OptionalArg::Present(eval) = val_option { + if objtype::isinstance(&eval, &vm.ctx.str_type()) + && objtype::isinstance(&enc, &vm.ctx.str_type()) + { + //return Ok(PyBytes::new(vec![1]).into_ref_with_type(vm, cls.clone())); + return Err(vm.new_type_error("Ok les 2 sont sstr".to_string())); + } else { + return Err(vm.new_type_error(format!( + "bytes() argument 2 must be str, not {}", + enc.class().name + ))); + } + } else { + return Err(vm.new_type_error("encoding without a string argument".to_string())); } - data_bytes - // return Err(vm.new_type_error("Cannot construct bytes".to_string())); + } + + let value = if let OptionalArg::Present(ival) = val_option { + println!("{:?}", ival); + match_class!(ival.clone(), + _i @ PyInt => { + let size = objint::get_value(&ival).to_usize().unwrap(); + let mut res: Vec = Vec::with_capacity(size); + for _ in 0..size { + res.push(0) + } + Ok(res)}, + _j @ PyList => load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), + _k @ PyTuple => load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), + _l @ PyString=> load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), + _obj => {return Err(vm.new_type_error(format!( + "int() argument must be a string or a number, not " + )));} + ) } else { - vec![] + Ok(vec![]) }; - - PyBytes::new(value).into_ref_with_type(vm, cls) + match value { + Ok(val) => PyBytes::new(val).into_ref_with_type(vm, cls.clone()), + Err(err) => Err(err), + } } #[pymethod(name = "__repr__")] From 7e965c7cc8b331c11a339537161aee841b5a6ad0 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Sun, 7 Apr 2019 10:12:57 +0200 Subject: [PATCH 03/19] check encoding in new --- vm/src/obj/objbytes.rs | 71 +++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 26de250107..1b06a96416 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -21,7 +21,9 @@ use crate::vm::VirtualMachine; use super::objbyteinner::PyByteInner; use super::objint; use super::objiter; +use super::objstr; use super::objtype::PyClassRef; +use std::clone::Clone; /// "bytes(iterable_of_ints) -> bytes\n\ /// bytes(string, encoding[, errors]) -> bytes\n\ @@ -125,13 +127,24 @@ impl PyBytesRef { vm: &VirtualMachine, ) -> PyResult { // Create bytes data: + if let OptionalArg::Present(enc) = enc_option { if let OptionalArg::Present(eval) = val_option { - if objtype::isinstance(&eval, &vm.ctx.str_type()) - && objtype::isinstance(&enc, &vm.ctx.str_type()) - { - //return Ok(PyBytes::new(vec![1]).into_ref_with_type(vm, cls.clone())); - return Err(vm.new_type_error("Ok les 2 sont sstr".to_string())); + if let Ok(input) = eval.downcast::() { + if let Ok(encoding) = enc.downcast::() { + if encoding.value.to_lowercase() == "utf8".to_string() + || encoding.value.to_lowercase() == "utf-8".to_string() + { + return PyBytes::new(input.value.as_bytes().to_vec()) + .into_ref_with_type(vm, cls.clone()); + } else { + return Err( + vm.new_value_error(format!("unknown encoding: {}", encoding.value)), //should be lookup error + ); + } + } else { + return Err(vm.new_type_error("encodin is not string".to_string())); + } } else { return Err(vm.new_type_error(format!( "bytes() argument 2 must be str, not {}", @@ -141,31 +154,31 @@ impl PyBytesRef { } else { return Err(vm.new_type_error("encoding without a string argument".to_string())); } - } - - let value = if let OptionalArg::Present(ival) = val_option { - println!("{:?}", ival); - match_class!(ival.clone(), - _i @ PyInt => { - let size = objint::get_value(&ival).to_usize().unwrap(); - let mut res: Vec = Vec::with_capacity(size); - for _ in 0..size { - res.push(0) - } - Ok(res)}, - _j @ PyList => load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), - _k @ PyTuple => load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), - _l @ PyString=> load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), - _obj => {return Err(vm.new_type_error(format!( - "int() argument must be a string or a number, not " - )));} - ) } else { - Ok(vec![]) - }; - match value { - Ok(val) => PyBytes::new(val).into_ref_with_type(vm, cls.clone()), - Err(err) => Err(err), + let value = if let OptionalArg::Present(ival) = val_option { + println!("{:?}", ival); + match_class!(ival.clone(), + _i @ PyInt => { + let size = objint::get_value(&ival).to_usize().unwrap(); + let mut res: Vec = Vec::with_capacity(size); + for _ in 0..size { + res.push(0) + } + Ok(res)}, + _l @ PyString=> {return Err(vm.new_type_error(format!( + "string argument without an encoding" + )));}, + _j @ PyList => load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), + _k @ PyTuple => load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), + _obj => {} + ) + } else { + Ok(vec![]) + }; + match value { + Ok(val) => PyBytes::new(val).into_ref_with_type(vm, cls.clone()), + Err(err) => Err(err), + } } } From 2eb8e7bf2b49ba578f0180588a5e8592d38c5fd0 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Sun, 7 Apr 2019 18:55:35 +0200 Subject: [PATCH 04/19] new ok for pybytes --- tests/snippets/bytes.py | 18 ++++++++++++ vm/src/obj/objbytes.rs | 61 ++++++++++++++++++----------------------- 2 files changed, 45 insertions(+), 34 deletions(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index 19ebef68ae..1a4b43dfb0 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -1,3 +1,21 @@ +from testutils import assertRaises + +# new +assert bytes([1,2,3]) +assert bytes((1,2,3)) +assert bytes(range(4)) +assert b'bla' +assert bytes(3) +assert bytes("bla", "utf8") +try: + bytes("bla") +except TypeError: + assert True +else: + assert False + +# + assert b'foobar'.__eq__(2) == NotImplemented assert b'foobar'.__ne__(2) == NotImplemented assert b'foobar'.__gt__(2) == NotImplemented diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 1b06a96416..74795088f6 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -97,26 +97,6 @@ extend_class!(context, bytesiterator_type, { });*/ //} -fn load_byte( - elements: PyResult>, - vm: &VirtualMachine, -) -> Result, PyObjectRef> { - if let Ok(value) = elements { - let mut data_bytes = vec![]; - for elem in value.iter() { - let v = objint::to_int(vm, &elem, 10)?; - if let Some(i) = v.to_u8() { - data_bytes.push(i); - } else { - return Err(vm.new_value_error("byte must be in range(0, 256)".to_string())); - } - } - Ok(data_bytes) - } else { - Err(vm.new_value_error("byte must be in range(0, 256)".to_string())) - } -} - #[pyimpl(__inside_vm)] impl PyBytesRef { #[pymethod(name = "__new__")] @@ -126,14 +106,14 @@ impl PyBytesRef { enc_option: OptionalArg, vm: &VirtualMachine, ) -> PyResult { - // Create bytes data: - + // First handle bytes(string, encoding[, errors]) if let OptionalArg::Present(enc) = enc_option { if let OptionalArg::Present(eval) = val_option { if let Ok(input) = eval.downcast::() { - if let Ok(encoding) = enc.downcast::() { + if let Ok(encoding) = enc.clone().downcast::() { if encoding.value.to_lowercase() == "utf8".to_string() || encoding.value.to_lowercase() == "utf-8".to_string() + // TODO: different encoding { return PyBytes::new(input.value.as_bytes().to_vec()) .into_ref_with_type(vm, cls.clone()); @@ -143,23 +123,23 @@ impl PyBytesRef { ); } } else { - return Err(vm.new_type_error("encodin is not string".to_string())); + return Err(vm.new_type_error(format!( + "bytes() argument 2 must be str, not {}", + enc.class().name + ))); } } else { - return Err(vm.new_type_error(format!( - "bytes() argument 2 must be str, not {}", - enc.class().name - ))); + return Err(vm.new_type_error("encoding without a string argument".to_string())); } } else { return Err(vm.new_type_error("encoding without a string argument".to_string())); } + // On ly one argument } else { let value = if let OptionalArg::Present(ival) = val_option { - println!("{:?}", ival); match_class!(ival.clone(), - _i @ PyInt => { - let size = objint::get_value(&ival).to_usize().unwrap(); + i @ PyInt => { + let size = objint::get_value(&i.into_object()).to_usize().unwrap(); let mut res: Vec = Vec::with_capacity(size); for _ in 0..size { res.push(0) @@ -168,9 +148,22 @@ impl PyBytesRef { _l @ PyString=> {return Err(vm.new_type_error(format!( "string argument without an encoding" )));}, - _j @ PyList => load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), - _k @ PyTuple => load_byte(vm.extract_elements(&ival), vm).or_else(|x| {return Err(x) }), - _obj => {} + obj => { + let elements = vm.extract_elements(&obj).or_else(|_| {return Err(vm.new_type_error(format!( + "cannot convert {} object to bytes", obj.class().name)));}); + + let mut data_bytes = vec![]; + for elem in elements.unwrap(){ + let v = objint::to_int(vm, &elem, 10)?; + if let Some(i) = v.to_u8() { + data_bytes.push(i); + } else { + return Err(vm.new_value_error("bytes must be in range(0, 256)".to_string())); + } + + } + Ok(data_bytes) + } ) } else { Ok(vec![]) From f538a92007f375c07dd018f295fd34b101be4d4b Mon Sep 17 00:00:00 2001 From: jgirardet Date: Sun, 7 Apr 2019 20:34:27 +0200 Subject: [PATCH 05/19] pybytes.__new__ ok --- vm/src/obj/objbyteinner.rs | 91 +++++++++++++++++++++++++++++++++++++- vm/src/obj/objbytes.rs | 69 ++--------------------------- 2 files changed, 92 insertions(+), 68 deletions(-) diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 0607621f1a..fd08e4b939 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -1,9 +1,96 @@ +use crate::obj::objtype::PyClassRef; +use crate::pyobject::PyObjectRef; + +use crate::function::OptionalArg; + +use crate::vm::VirtualMachine; + +use crate::pyobject::{PyResult, TypeProtocol}; + +use crate::obj::objstr::PyString; + +use super::objint; +use crate::obj::objint::PyInt; +use num_traits::ToPrimitive; + #[derive(Debug, Default, Clone)] pub struct PyByteInner { pub elements: Vec, } + impl PyByteInner { - pub fn new(data: Vec) -> Self { - PyByteInner { elements: data } + pub fn new( + val_option: OptionalArg, + enc_option: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult { + // First handle bytes(string, encoding[, errors]) + if let OptionalArg::Present(enc) = enc_option { + if let OptionalArg::Present(eval) = val_option { + if let Ok(input) = eval.downcast::() { + if let Ok(encoding) = enc.clone().downcast::() { + if encoding.value.to_lowercase() == "utf8".to_string() + || encoding.value.to_lowercase() == "utf-8".to_string() + // TODO: different encoding + { + return Ok(PyByteInner { + elements: input.value.as_bytes().to_vec(), + }); + } else { + return Err( + vm.new_value_error(format!("unknown encoding: {}", encoding.value)), //should be lookup error + ); + } + } else { + return Err(vm.new_type_error(format!( + "bytes() argument 2 must be str, not {}", + enc.class().name + ))); + } + } else { + return Err(vm.new_type_error("encoding without a string argument".to_string())); + } + } else { + return Err(vm.new_type_error("encoding without a string argument".to_string())); + } + // On ly one argument + } else { + let value = if let OptionalArg::Present(ival) = val_option { + match_class!(ival.clone(), + i @ PyInt => { + let size = objint::get_value(&i.into_object()).to_usize().unwrap(); + let mut res: Vec = Vec::with_capacity(size); + for _ in 0..size { + res.push(0) + } + Ok(res)}, + _l @ PyString=> {return Err(vm.new_type_error(format!( + "string argument without an encoding" + )));}, + obj => { + let elements = vm.extract_elements(&obj).or_else(|_| {return Err(vm.new_type_error(format!( + "cannot convert {} object to bytes", obj.class().name)));}); + + let mut data_bytes = vec![]; + for elem in elements.unwrap(){ + let v = objint::to_int(vm, &elem, 10)?; + if let Some(i) = v.to_u8() { + data_bytes.push(i); + } else { + return Err(vm.new_value_error("bytes must be in range(0, 256)".to_string())); + } + + } + Ok(data_bytes) + } + ) + } else { + Ok(vec![]) + }; + match value { + Ok(val) => Ok(PyByteInner { elements: val }), + Err(err) => Err(err), + } + } } } diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 74795088f6..07e71e23cf 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -106,73 +106,10 @@ impl PyBytesRef { enc_option: OptionalArg, vm: &VirtualMachine, ) -> PyResult { - // First handle bytes(string, encoding[, errors]) - if let OptionalArg::Present(enc) = enc_option { - if let OptionalArg::Present(eval) = val_option { - if let Ok(input) = eval.downcast::() { - if let Ok(encoding) = enc.clone().downcast::() { - if encoding.value.to_lowercase() == "utf8".to_string() - || encoding.value.to_lowercase() == "utf-8".to_string() - // TODO: different encoding - { - return PyBytes::new(input.value.as_bytes().to_vec()) - .into_ref_with_type(vm, cls.clone()); - } else { - return Err( - vm.new_value_error(format!("unknown encoding: {}", encoding.value)), //should be lookup error - ); - } - } else { - return Err(vm.new_type_error(format!( - "bytes() argument 2 must be str, not {}", - enc.class().name - ))); - } - } else { - return Err(vm.new_type_error("encoding without a string argument".to_string())); - } - } else { - return Err(vm.new_type_error("encoding without a string argument".to_string())); - } - // On ly one argument - } else { - let value = if let OptionalArg::Present(ival) = val_option { - match_class!(ival.clone(), - i @ PyInt => { - let size = objint::get_value(&i.into_object()).to_usize().unwrap(); - let mut res: Vec = Vec::with_capacity(size); - for _ in 0..size { - res.push(0) - } - Ok(res)}, - _l @ PyString=> {return Err(vm.new_type_error(format!( - "string argument without an encoding" - )));}, - obj => { - let elements = vm.extract_elements(&obj).or_else(|_| {return Err(vm.new_type_error(format!( - "cannot convert {} object to bytes", obj.class().name)));}); - - let mut data_bytes = vec![]; - for elem in elements.unwrap(){ - let v = objint::to_int(vm, &elem, 10)?; - if let Some(i) = v.to_u8() { - data_bytes.push(i); - } else { - return Err(vm.new_value_error("bytes must be in range(0, 256)".to_string())); - } - - } - Ok(data_bytes) - } - ) - } else { - Ok(vec![]) - }; - match value { - Ok(val) => PyBytes::new(val).into_ref_with_type(vm, cls.clone()), - Err(err) => Err(err), - } + PyBytes { + inner: PyByteInner::new(val_option, enc_option, vm)?, } + .into_ref_with_type(vm, cls) } #[pymethod(name = "__repr__")] From ec98b4d6bf509abffe66d47ef43ef60f3bdc7248 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Sun, 7 Apr 2019 22:36:13 +0200 Subject: [PATCH 06/19] repr len eq --- tests/snippets/bytes.py | 23 ++++++++++++++++++++++- vm/src/obj/objbyteinner.rs | 30 +++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index 1a4b43dfb0..85469b756e 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -14,7 +14,28 @@ else: assert False -# +a = b"abcd" +b = b"ab" +c = b"abcd" + +# +# repr +assert repr(bytes([0, 1, 2])) == repr(b'\x00\x01\x02') +assert ( +repr(bytes([0, 1, 9, 10, 11, 13, 31, 32, 33, 89, 120, 255]) +== "b'\\x00\\x01\\t\\n\\x0b\\r\\x1f !Yx\\xff'") +) +assert repr(b"abcd") == "b'abcd'" + +#len +assert len(bytes("abcdé", "utf8")) == 6 + +# +assert a == b"abcd" +# assert a > b +# assert a >= b +# assert b < a +# assert b <= a assert b'foobar'.__eq__(2) == NotImplemented assert b'foobar'.__ne__(2) == NotImplemented diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index fd08e4b939..8d32aeabf9 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -1,4 +1,3 @@ -use crate::obj::objtype::PyClassRef; use crate::pyobject::PyObjectRef; use crate::function::OptionalArg; @@ -93,4 +92,33 @@ impl PyByteInner { } } } + + pub fn repr(&self) -> PyResult { + let mut res = String::with_capacity(self.elements.len()); + for i in self.elements.iter() { + match i { + 0..=8 => res.push_str(&format!("\\x0{}", i)), + 9 => res.push_str("\\t"), + 10 => res.push_str("\\n"), + 13 => res.push_str("\\r"), + 32..=126 => res.push(*(i) as char), + _ => res.push_str(&format!("\\x{:x}", i)), + } + } + Ok(res) + } + + pub fn len(&self) -> usize { + self.elements.len() + } + + pub fn eq(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { + if self.elements == other.elements { + Ok(vm.new_bool(true)) + } else { + Ok(vm.new_bool(false)) + } + } } +// TODO +// fix b"é" not allowed should be bytes("é", "utf8") From dddf9fee3906348063b9298b4621ae32adfd0e72 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Sun, 7 Apr 2019 22:42:47 +0200 Subject: [PATCH 07/19] gt lt ge le --- tests/snippets/bytes.py | 8 +- vm/src/obj/objbyteinner.rs | 32 +++++++ vm/src/obj/objbytes.rs | 181 +++++++++++++++---------------------- 3 files changed, 110 insertions(+), 111 deletions(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index 85469b756e..a0371b7898 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -32,10 +32,10 @@ # assert a == b"abcd" -# assert a > b -# assert a >= b -# assert b < a -# assert b <= a +assert a > b +assert a >= b +assert b < a +assert b <= a assert b'foobar'.__eq__(2) == NotImplemented assert b'foobar'.__ne__(2) == NotImplemented diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 8d32aeabf9..81c70d4407 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -119,6 +119,38 @@ impl PyByteInner { Ok(vm.new_bool(false)) } } + + pub fn ge(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { + if self.elements >= other.elements { + Ok(vm.new_bool(true)) + } else { + Ok(vm.new_bool(false)) + } + } + + pub fn le(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { + if self.elements <= other.elements { + Ok(vm.new_bool(true)) + } else { + Ok(vm.new_bool(false)) + } + } + + pub fn gt(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { + if self.elements > other.elements { + Ok(vm.new_bool(true)) + } else { + Ok(vm.new_bool(false)) + } + } + + pub fn lt(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { + if self.elements < other.elements { + Ok(vm.new_bool(true)) + } else { + Ok(vm.new_bool(false)) + } + } } // TODO // fix b"é" not allowed should be bytes("é", "utf8") diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 07e71e23cf..54427fc57b 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -1,30 +1,11 @@ -use crate::obj::objint::PyInt; -use crate::obj::objlist; -use crate::obj::objlist::PyList; -use crate::obj::objstr::PyString; -use crate::obj::objtuple::PyTuple; -use crate::obj::objtype; -use std::cell::Cell; -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; +use crate::vm::VirtualMachine; use std::ops::Deref; -use num_traits::ToPrimitive; - use crate::function::OptionalArg; -use crate::pyobject::{ - IntoPyObject, PyClassImpl, PyContext, PyIterable, PyObjectRef, PyRef, PyResult, PyValue, - TryFromObject, TypeProtocol, -}; -use crate::vm::VirtualMachine; +use crate::pyobject::{PyClassImpl, PyContext, PyObjectRef, PyRef, PyResult, PyValue}; use super::objbyteinner::PyByteInner; -use super::objint; -use super::objiter; -use super::objstr; use super::objtype::PyClassRef; -use std::clone::Clone; - /// "bytes(iterable_of_ints) -> bytes\n\ /// bytes(string, encoding[, errors]) -> bytes\n\ /// bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\ @@ -35,7 +16,7 @@ use std::clone::Clone; /// - any object implementing the buffer API.\n \ /// - an integer"; #[pyclass(name = "bytes", __inside_vm)] -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct PyBytes { inner: PyByteInner, } @@ -113,102 +94,88 @@ impl PyBytesRef { } #[pymethod(name = "__repr__")] - fn repr(self, _vm: &VirtualMachine) -> String { - // TODO: don't just unwrap - let data = self.inner.elements.clone(); - format!("b'{:?}'", data) + fn repr(self, vm: &VirtualMachine) -> PyResult { + Ok(vm.new_str(format!("b'{}'", self.inner.repr()?))) } #[pymethod(name = "__len__")] fn len(self, _vm: &VirtualMachine) -> usize { - self.inner.elements.len() - } -} -/* - fn eq(self, other: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef { - if let Ok(other) = other.downcast::() { - vm.ctx.new_bool(self.value == other.value) - } else { - vm.ctx.not_implemented() - } - } - - fn ge(self, other: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef { - if let Ok(other) = other.downcast::() { - vm.ctx.new_bool(self.value >= other.value) - } else { - vm.ctx.not_implemented() - } - } - - fn gt(self, other: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef { - if let Ok(other) = other.downcast::() { - vm.ctx.new_bool(self.value > other.value) - } else { - vm.ctx.not_implemented() - } + self.inner.len() } - fn le(self, other: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef { - if let Ok(other) = other.downcast::() { - vm.ctx.new_bool(self.value <= other.value) - } else { - vm.ctx.not_implemented() - } + #[pymethod(name = "__eq__")] + fn eq(self, other: PyObjectRef, vm: &VirtualMachine) -> PyResult { + match_class!(other, + bytes @ PyBytes => self.inner.eq(&bytes.inner, vm), + _ => Ok(vm.ctx.not_implemented())) } - fn lt(self, other: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef { - if let Ok(other) = other.downcast::() { - vm.ctx.new_bool(self.value < other.value) - } else { - vm.ctx.not_implemented() - } + #[pymethod(name = "__ge__")] + fn ge(self, other: PyObjectRef, vm: &VirtualMachine) -> PyResult { + match_class!(other, + bytes @ PyBytes => self.inner.ge(&bytes.inner, vm), + _ => Ok(vm.ctx.not_implemented())) } - - - fn hash(self, _vm: &VirtualMachine) -> u64 { - let mut hasher = DefaultHasher::new(); - self.value.hash(&mut hasher); - hasher.finish() + #[pymethod(name = "__le__")] + fn le(self, other: PyObjectRef, vm: &VirtualMachine) -> PyResult { + match_class!(other, + bytes @ PyBytes => self.inner.le(&bytes.inner, vm), + _ => Ok(vm.ctx.not_implemented())) } - - - fn iter(self, _vm: &VirtualMachine) -> PyBytesIterator { - PyBytesIterator { - position: Cell::new(0), - bytes: self, - } + #[pymethod(name = "__gt__")] + fn gt(self, other: PyObjectRef, vm: &VirtualMachine) -> PyResult { + match_class!(other, + bytes @ PyBytes => self.inner.gt(&bytes.inner, vm), + _ => Ok(vm.ctx.not_implemented())) } -} - - - -#[derive(Debug)] -pub struct PyBytesIterator { - position: Cell, - bytes: PyBytesRef, -} - -impl PyValue for PyBytesIterator { - fn class(vm: &VirtualMachine) -> PyClassRef { - vm.ctx.bytesiterator_type() + #[pymethod(name = "__lt__")] + fn lt(self, other: PyObjectRef, vm: &VirtualMachine) -> PyResult { + match_class!(other, + bytes @ PyBytes => self.inner.lt(&bytes.inner, vm), + _ => Ok(vm.ctx.not_implemented())) } } -type PyBytesIteratorRef = PyRef; - -impl PyBytesIteratorRef { - fn next(self, vm: &VirtualMachine) -> PyResult { - if self.position.get() < self.bytes.value.len() { - let ret = self.bytes[self.position.get()]; - self.position.set(self.position.get() + 1); - Ok(ret) - } else { - Err(objiter::new_stop_iteration(vm)) - } - } - - fn iter(self, _vm: &VirtualMachine) -> Self { - self - } -}*/ +// fn hash(self, _vm: &VirtualMachine) -> u64 { +// let mut hasher = DefaultHasher::new(); +// self.value.hash(&mut hasher); +// hasher.finish() +// } + +// fn iter(self, _vm: &VirtualMachine) -> PyBytesIterator { +// PyBytesIterator { +// position: Cell::new(0), +// bytes: self, +// } +// } +// } + +// #[derive(Debug)] +// pub struct PyBytesIterator { +// position: Cell, +// bytes: PyBytesRef, +// } + +// impl PyValue for PyBytesIterator { +// fn class(vm: &VirtualMachine) -> PyClassRef { +// vm.ctx.bytesiterator_type() +// } +// } + +// type PyBytesIteratorRef = PyRef; + +// impl PyBytesIteratorRef { +// fn next(self, vm: &VirtualMachine) -> PyResult { +// if self.position.get() < self.bytes.value.len() { +// let ret = self.bytes[self.position.get()]; +// self.position.set(self.position.get() + 1); +// Ok(ret) +// } else { +// Err(objiter::new_stop_iteration(vm)) +// } +// } + +// fn iter(self, _vm: &VirtualMachine) -> Self { +// self +// } +// } From 3c736c1f940f65e95275512e87d3ea28f46238e7 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Mon, 8 Apr 2019 00:11:20 +0200 Subject: [PATCH 08/19] iter --- tests/snippets/bytes.py | 8 ++- vm/src/obj/objbyteinner.rs | 9 +++ vm/src/obj/objbytes.rs | 122 ++++++++++++++++--------------------- 3 files changed, 67 insertions(+), 72 deletions(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index a0371b7898..bb72847b65 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -30,7 +30,7 @@ #len assert len(bytes("abcdé", "utf8")) == 6 -# +#comp assert a == b"abcd" assert a > b assert a >= b @@ -43,3 +43,9 @@ assert b'foobar'.__ge__(2) == NotImplemented assert b'foobar'.__lt__(2) == NotImplemented assert b'foobar'.__le__(2) == NotImplemented + +#hash +hash(a) == hash(b"abcd") + +#iter +[i for i in b"abcd"] == ["a", "b", "c", "d"] diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 81c70d4407..93293e6271 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -7,6 +7,8 @@ use crate::vm::VirtualMachine; use crate::pyobject::{PyResult, TypeProtocol}; use crate::obj::objstr::PyString; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; use super::objint; use crate::obj::objint::PyInt; @@ -151,6 +153,13 @@ impl PyByteInner { Ok(vm.new_bool(false)) } } + + pub fn hash(&self) -> usize { + let mut hasher = DefaultHasher::new(); + self.elements.hash(&mut hasher); + hasher.finish() as usize + } } + // TODO // fix b"é" not allowed should be bytes("é", "utf8") diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 54427fc57b..8c64944ca1 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -1,11 +1,14 @@ use crate::vm::VirtualMachine; +use core::cell::Cell; use std::ops::Deref; use crate::function::OptionalArg; use crate::pyobject::{PyClassImpl, PyContext, PyObjectRef, PyRef, PyResult, PyValue}; use super::objbyteinner::PyByteInner; +use super::objiter; use super::objtype::PyClassRef; + /// "bytes(iterable_of_ints) -> bytes\n\ /// bytes(string, encoding[, errors]) -> bytes\n\ /// bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\ @@ -44,39 +47,18 @@ impl PyValue for PyBytes { } } -// Binary data support - -// Fill bytes class methods: - pub fn get_value<'a>(obj: &'a PyObjectRef) -> impl Deref> + 'a { &obj.payload::().unwrap().inner.elements } -// pub fn init(context: &PyContext) { -// let bytes_doc = -pub fn init(ctx: &PyContext) { - PyBytesRef::extend_class(ctx, &ctx.bytes_type); +pub fn init(context: &PyContext) { + PyBytesRef::extend_class(context, &context.bytes_type); + let bytesiterator_type = &context.bytesiterator_type; + extend_class!(context, bytesiterator_type, { + "__next__" => context.new_rustfunc(PyBytesIteratorRef::next), + "__iter__" => context.new_rustfunc(PyBytesIteratorRef::iter), + }); } -//extend_class!(context, &context.bytes_type, { -//"__new__" => context.new_rustfunc(bytes_new), -/* "__eq__" => context.new_rustfunc(PyBytesRef::eq), -"__lt__" => context.new_rustfunc(PyBytesRef::lt), -"__le__" => context.new_rustfunc(PyBytesRef::le), -"__gt__" => context.new_rustfunc(PyBytesRef::gt), -"__ge__" => context.new_rustfunc(PyBytesRef::ge), -"__hash__" => context.new_rustfunc(PyBytesRef::hash), -"__repr__" => context.new_rustfunc(PyBytesRef::repr), -"__len__" => context.new_rustfunc(PyBytesRef::len), -"__iter__" => context.new_rustfunc(PyBytesRef::iter), -"__doc__" => context.new_str(bytes_doc.to_string())*/ -// }); - -/* let bytesiterator_type = &context.bytesiterator_type; -extend_class!(context, bytesiterator_type, { - "__next__" => context.new_rustfunc(PyBytesIteratorRef::next), - "__iter__" => context.new_rustfunc(PyBytesIteratorRef::iter), -});*/ -//} #[pyimpl(__inside_vm)] impl PyBytesRef { @@ -134,48 +116,46 @@ impl PyBytesRef { bytes @ PyBytes => self.inner.lt(&bytes.inner, vm), _ => Ok(vm.ctx.not_implemented())) } + #[pymethod(name = "__hash__")] + fn hash(self, _vm: &VirtualMachine) -> usize { + self.inner.hash() + } + + #[pymethod(name = "__iter__")] + fn iter(self, _vm: &VirtualMachine) -> PyBytesIterator { + PyBytesIterator { + position: Cell::new(0), + bytes: self, + } + } } -// fn hash(self, _vm: &VirtualMachine) -> u64 { -// let mut hasher = DefaultHasher::new(); -// self.value.hash(&mut hasher); -// hasher.finish() -// } - -// fn iter(self, _vm: &VirtualMachine) -> PyBytesIterator { -// PyBytesIterator { -// position: Cell::new(0), -// bytes: self, -// } -// } -// } - -// #[derive(Debug)] -// pub struct PyBytesIterator { -// position: Cell, -// bytes: PyBytesRef, -// } - -// impl PyValue for PyBytesIterator { -// fn class(vm: &VirtualMachine) -> PyClassRef { -// vm.ctx.bytesiterator_type() -// } -// } - -// type PyBytesIteratorRef = PyRef; - -// impl PyBytesIteratorRef { -// fn next(self, vm: &VirtualMachine) -> PyResult { -// if self.position.get() < self.bytes.value.len() { -// let ret = self.bytes[self.position.get()]; -// self.position.set(self.position.get() + 1); -// Ok(ret) -// } else { -// Err(objiter::new_stop_iteration(vm)) -// } -// } - -// fn iter(self, _vm: &VirtualMachine) -> Self { -// self -// } -// } +#[derive(Debug)] +pub struct PyBytesIterator { + position: Cell, + bytes: PyBytesRef, +} + +impl PyValue for PyBytesIterator { + fn class(vm: &VirtualMachine) -> PyClassRef { + vm.ctx.bytesiterator_type() + } +} + +type PyBytesIteratorRef = PyRef; + +impl PyBytesIteratorRef { + fn next(self, vm: &VirtualMachine) -> PyResult { + if self.position.get() < self.bytes.inner.len() { + let ret = self.bytes[self.position.get()]; + self.position.set(self.position.get() + 1); + Ok(ret) + } else { + Err(objiter::new_stop_iteration(vm)) + } + } + + fn iter(self, _vm: &VirtualMachine) -> Self { + self + } +} From ec65b8480fe35ece7182544eaf75d0582a257266 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Mon, 8 Apr 2019 00:45:53 +0200 Subject: [PATCH 09/19] add contains --- tests/snippets/bytes.py | 14 ++++++++++++++ vm/src/obj/objbyteinner.rs | 26 ++++++++++++++++++++++++++ vm/src/obj/objbytes.rs | 27 +++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index bb72847b65..2fd32153d3 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -49,3 +49,17 @@ #iter [i for i in b"abcd"] == ["a", "b", "c", "d"] + +#add +assert a + b == b"abcdab" + +#contains +# contains +assert b"ab" in b"abcd" +assert b"cd" in b"abcd" +assert b"abcd" in b"abcd" +assert b"a" in b"abcd" +assert b"d" in b"abcd" +assert b"dc" not in b"abcd" +# assert 97 in b"abcd" +# assert 150 not in b"abcd" \ No newline at end of file diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 93293e6271..03876095cd 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -159,6 +159,32 @@ impl PyByteInner { self.elements.hash(&mut hasher); hasher.finish() as usize } + + pub fn add(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { + let elements: Vec = self + .elements + .iter() + .chain(other.elements.iter()) + .cloned() + .collect(); + Ok(vm.ctx.new_bytes(elements)) + } + + pub fn contains_bytes(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { + for (n, i) in self.elements.iter().enumerate() { + if n + other.len() <= self.len() && *i == other.elements[0] { + if &self.elements[n..n + other.len()] == other.elements.as_slice() { + return Ok(vm.new_bool(true)); + } + } + } + Ok(vm.new_bool(false)) + } + + pub fn contains_int(&self, int: &PyInt, vm: &VirtualMachine) -> PyResult { + self.elements.contains(int); + Ok(vm.new_bool(false)) + } } // TODO diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 8c64944ca1..e052654bf5 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -128,6 +128,33 @@ impl PyBytesRef { bytes: self, } } + + #[pymethod(name = "__add__")] + fn add(self, other: PyObjectRef, vm: &VirtualMachine) -> PyResult { + match_class!(other, + bytes @ PyBytes => self.inner.add(&bytes.inner, vm), + _ => Ok(vm.ctx.not_implemented())) + } + + #[pymethod(name = "__contains__")] + fn contains(self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // no new style since objint is not. + match_class!(needle, + bytes @ PyBytes => self.inner.contains_bytes(&bytes.inner, vm), + int @ PyInt => self.inner.contains_int(&int, vm), + _ => Ok(vm.ctx.not_implemented())) + // if objtype::isinstance(&needle, &vm.ctx.bytes_type()) { + // let result = vec_contains(&self.value, &get_value(&needle)); + // vm.ctx.new_bool(result) + // } else if objtype::isinstance(&needle, &vm.ctx.int_type()) { + // let result = self + // .value + // .contains(&objint::get_value(&needle).to_u8().unwrap()); + // vm.ctx.new_bool(result) + // } else { + // vm.new_type_error(format!("Cannot add {:?} and {:?}", self, needle)) + // } + } } #[derive(Debug)] From 8232a4d28512323125f853e3d448cc190c2c3a6f Mon Sep 17 00:00:00 2001 From: jgirardet Date: Mon, 8 Apr 2019 21:27:44 +0200 Subject: [PATCH 10/19] finish contains --- tests/snippets/bytes.py | 45 ++++++++++++++++++++------------------ vm/src/obj/objbyteinner.rs | 14 ++++++++---- vm/src/obj/objbytes.rs | 12 +--------- 3 files changed, 35 insertions(+), 36 deletions(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index 2fd32153d3..b03eb38085 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -1,10 +1,10 @@ from testutils import assertRaises # new -assert bytes([1,2,3]) -assert bytes((1,2,3)) +assert bytes([1, 2, 3]) +assert bytes((1, 2, 3)) assert bytes(range(4)) -assert b'bla' +assert b"bla" assert bytes(3) assert bytes("bla", "utf8") try: @@ -20,40 +20,39 @@ # # repr -assert repr(bytes([0, 1, 2])) == repr(b'\x00\x01\x02') -assert ( -repr(bytes([0, 1, 9, 10, 11, 13, 31, 32, 33, 89, 120, 255]) -== "b'\\x00\\x01\\t\\n\\x0b\\r\\x1f !Yx\\xff'") +assert repr(bytes([0, 1, 2])) == repr(b"\x00\x01\x02") +assert repr( + bytes([0, 1, 9, 10, 11, 13, 31, 32, 33, 89, 120, 255]) + == "b'\\x00\\x01\\t\\n\\x0b\\r\\x1f !Yx\\xff'" ) assert repr(b"abcd") == "b'abcd'" -#len +# len assert len(bytes("abcdé", "utf8")) == 6 -#comp +# comp assert a == b"abcd" assert a > b assert a >= b assert b < a assert b <= a -assert b'foobar'.__eq__(2) == NotImplemented -assert b'foobar'.__ne__(2) == NotImplemented -assert b'foobar'.__gt__(2) == NotImplemented -assert b'foobar'.__ge__(2) == NotImplemented -assert b'foobar'.__lt__(2) == NotImplemented -assert b'foobar'.__le__(2) == NotImplemented +assert b"foobar".__eq__(2) == NotImplemented +assert b"foobar".__ne__(2) == NotImplemented +assert b"foobar".__gt__(2) == NotImplemented +assert b"foobar".__ge__(2) == NotImplemented +assert b"foobar".__lt__(2) == NotImplemented +assert b"foobar".__le__(2) == NotImplemented -#hash +# hash hash(a) == hash(b"abcd") -#iter +# iter [i for i in b"abcd"] == ["a", "b", "c", "d"] -#add +# add assert a + b == b"abcdab" -#contains # contains assert b"ab" in b"abcd" assert b"cd" in b"abcd" @@ -61,5 +60,9 @@ assert b"a" in b"abcd" assert b"d" in b"abcd" assert b"dc" not in b"abcd" -# assert 97 in b"abcd" -# assert 150 not in b"abcd" \ No newline at end of file +assert 97 in b"abcd" +assert 150 not in b"abcd" +try: + 350 in b"abcd" +except ValueError: + pass diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 03876095cd..95df844214 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -181,11 +181,17 @@ impl PyByteInner { Ok(vm.new_bool(false)) } - pub fn contains_int(&self, int: &PyInt, vm: &VirtualMachine) -> PyResult { - self.elements.contains(int); - Ok(vm.new_bool(false)) + pub fn contains_int(&self, int: &PyInt, vm: &VirtualMachine) -> PyResult { + if let Some(int) = int.as_bigint().to_u8() { + if self.elements.contains(&int) { + Ok(vm.new_bool(true)) + } else { + Ok(vm.new_bool(false)) + } + } else { + Err(vm.new_value_error("byte must be in range(0, 256)".to_string())) + } } } - // TODO // fix b"é" not allowed should be bytes("é", "utf8") diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index e052654bf5..f376dbc67a 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -1,3 +1,4 @@ +use crate::obj::objint::PyInt; use crate::vm::VirtualMachine; use core::cell::Cell; use std::ops::Deref; @@ -143,17 +144,6 @@ impl PyBytesRef { bytes @ PyBytes => self.inner.contains_bytes(&bytes.inner, vm), int @ PyInt => self.inner.contains_int(&int, vm), _ => Ok(vm.ctx.not_implemented())) - // if objtype::isinstance(&needle, &vm.ctx.bytes_type()) { - // let result = vec_contains(&self.value, &get_value(&needle)); - // vm.ctx.new_bool(result) - // } else if objtype::isinstance(&needle, &vm.ctx.int_type()) { - // let result = self - // .value - // .contains(&objint::get_value(&needle).to_u8().unwrap()); - // vm.ctx.new_bool(result) - // } else { - // vm.new_type_error(format!("Cannot add {:?} and {:?}", self, needle)) - // } } } From b53595831005023bdfeb6e5c93eb40b3286775f9 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Mon, 8 Apr 2019 23:12:40 +0200 Subject: [PATCH 11/19] add getitem, fix contain error_message --- tests/snippets/bytes.py | 11 +++++++++++ vm/src/obj/objbyteinner.rs | 18 +++++++++++++++++- vm/src/obj/objbytes.rs | 12 ++++++++++-- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index b03eb38085..770f4f373b 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -66,3 +66,14 @@ 350 in b"abcd" except ValueError: pass + + +# getitem +d = b"abcdefghij" + +assert d[1] == 98 +assert d[-1] == 106 +assert d[2:6] == b"cdef" +assert d[-6:] == b"efghij" +assert d[1:8:2] == b"bdfh" +assert d[8:1:-2] == b"igec" diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 95df844214..bb9966ebfa 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -11,6 +11,7 @@ use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use super::objint; +use super::objsequence::PySliceableSequence; use crate::obj::objint::PyInt; use num_traits::ToPrimitive; @@ -189,9 +190,24 @@ impl PyByteInner { Ok(vm.new_bool(false)) } } else { - Err(vm.new_value_error("byte must be in range(0, 256)".to_string())) + Err(vm.new_value_error("byte mu st be in range(0, 256)".to_string())) } } + + pub fn getitem_int(&self, int: &PyInt, vm: &VirtualMachine) -> PyResult { + if let Some(idx) = self.elements.get_pos(int.as_bigint().to_i32().unwrap()) { + Ok(vm.new_int(self.elements[idx])) + } else { + Err(vm.new_index_error("index out of range".to_string())) + } + } + + pub fn getitem_slice(&self, slice: &PyObjectRef, vm: &VirtualMachine) -> PyResult { + Ok(vm + .ctx + .new_bytes(self.elements.get_slice_items(vm, slice).unwrap())) + } } + // TODO // fix b"é" not allowed should be bytes("é", "utf8") diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index f376dbc67a..2c15e60cda 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -8,6 +8,7 @@ use crate::pyobject::{PyClassImpl, PyContext, PyObjectRef, PyRef, PyResult, PyVa use super::objbyteinner::PyByteInner; use super::objiter; +use super::objslice::PySlice; use super::objtype::PyClassRef; /// "bytes(iterable_of_ints) -> bytes\n\ @@ -139,11 +140,18 @@ impl PyBytesRef { #[pymethod(name = "__contains__")] fn contains(self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult { - // no new style since objint is not. match_class!(needle, bytes @ PyBytes => self.inner.contains_bytes(&bytes.inner, vm), int @ PyInt => self.inner.contains_int(&int, vm), - _ => Ok(vm.ctx.not_implemented())) + obj => Err(vm.new_type_error(format!("a bytes-like object is required, not {}", obj)))) + } + + #[pymethod(name = "__getitem__")] + fn getitem(self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult { + match_class!(needle, + int @ PyInt => self.inner.getitem_int(&int, vm), + slice @ PySlice => self.inner.getitem_slice(slice.as_object(), vm), + obj => Err(vm.new_type_error(format!("byte indices must be integers or slices, not {}", obj)))) } } From 2940c7cc280143de25777198e1bb3631b3f54104 Mon Sep 17 00:00:00 2001 From: Jimmy Girardet Date: Tue, 9 Apr 2019 14:33:29 +0200 Subject: [PATCH 12/19] add islpaha isalnum isdigit islower isupper isspace istitle --- tests/snippets/bytes.py | 35 +++++++++++++++ vm/src/obj/objbyteinner.rs | 90 ++++++++++++++++++++++++++++++++++++++ vm/src/obj/objbytes.rs | 33 ++++++++++++++ 3 files changed, 158 insertions(+) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index 770f4f373b..ceb52921cc 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -77,3 +77,38 @@ assert d[-6:] == b"efghij" assert d[1:8:2] == b"bdfh" assert d[8:1:-2] == b"igec" + + +# is_xx methods + +assert bytes(b"1a23").isalnum() +assert not bytes(b"1%a23").isalnum() + +assert bytes(b"abc").isalpha() +assert not bytes(b"abc1").isalpha() + +# travis doesn't like this +# assert bytes(b'xyz').isascii() +# assert not bytes([128, 157, 32]).isascii() + +assert bytes(b"1234567890").isdigit() +assert not bytes(b"12ab").isdigit() + +l = bytes(b"lower") +b = bytes(b"UPPER") + +assert l.islower() +assert not l.isupper() +assert b.isupper() +assert not bytes(b"Super Friends").islower() + +assert bytes(b" \n\t").isspace() +assert not bytes(b"\td\n").isspace() + +assert b.isupper() +assert not b.islower() +assert l.islower() +assert not bytes(b"tuPpEr").isupper() + +assert bytes(b"Is Title Case").istitle() +assert not bytes(b"is Not title casE").istitle() diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index bb9966ebfa..14da2521eb 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -207,6 +207,96 @@ impl PyByteInner { .ctx .new_bytes(self.elements.get_slice_items(vm, slice).unwrap())) } + + pub fn isalnum(&self, vm: &VirtualMachine) -> PyResult { + Ok(vm.new_bool( + !self.elements.is_empty() + && self + .elements + .iter() + .all(|x| char::from(*x).is_alphanumeric()), + )) + } + + pub fn isalpha(&self, vm: &VirtualMachine) -> PyResult { + Ok(vm.new_bool( + !self.elements.is_empty() + && self.elements.iter().all(|x| char::from(*x).is_alphabetic()), + )) + } + + pub fn isascii(&self, vm: &VirtualMachine) -> PyResult { + Ok(vm.new_bool( + !self.elements.is_empty() && self.elements.iter().all(|x| char::from(*x).is_ascii()), + )) + } + + pub fn isdigit(&self, vm: &VirtualMachine) -> PyResult { + Ok(vm.new_bool( + !self.elements.is_empty() && self.elements.iter().all(|x| char::from(*x).is_digit(10)), + )) + } + + pub fn islower(&self, vm: &VirtualMachine) -> PyResult { + Ok(vm.new_bool( + !self.elements.is_empty() + && self + .elements + .iter() + .filter(|x| !char::from(**x).is_whitespace()) + .all(|x| char::from(*x).is_lowercase()), + )) + } + + pub fn isspace(&self, vm: &VirtualMachine) -> PyResult { + Ok(vm.new_bool( + !self.elements.is_empty() + && self.elements.iter().all(|x| char::from(*x).is_whitespace()), + )) + } + + pub fn isupper(&self, vm: &VirtualMachine) -> PyResult { + Ok(vm.new_bool( + !self.elements.is_empty() + && self + .elements + .iter() + .filter(|x| !char::from(**x).is_whitespace()) + .all(|x| char::from(*x).is_uppercase()), + )) + } + + pub fn istitle(&self, vm: &VirtualMachine) -> PyResult { + if self.elements.is_empty() { + return Ok(vm.new_bool(false)); + } + + let mut iter = self.elements.iter().peekable(); + let mut prev_cased = false; + + while let Some(c) = iter.next() { + let current = char::from(*c); + let next = if let Some(k) = iter.peek() { + char::from(**k) + } else if current.is_uppercase() { + return Ok(vm.new_bool(!prev_cased)); + } else { + return Ok(vm.new_bool(prev_cased)); + }; + + let is_cased = current.to_uppercase().next().unwrap() != current + || current.to_lowercase().next().unwrap() != current; + if (is_cased && next.is_uppercase() && !prev_cased) + || (!is_cased && next.is_lowercase()) + { + return Ok(vm.new_bool(false)); + } + + prev_cased = is_cased; + } + + Ok(vm.new_bool(true)) + } } // TODO diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 2c15e60cda..83ce3b8fbd 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -153,6 +153,39 @@ impl PyBytesRef { slice @ PySlice => self.inner.getitem_slice(slice.as_object(), vm), obj => Err(vm.new_type_error(format!("byte indices must be integers or slices, not {}", obj)))) } + + #[pymethod(name = "isalnum")] + fn isalnum(self, vm: &VirtualMachine) -> PyResult { + self.inner.isalnum(vm) + } + #[pymethod(name = "isalpha")] + fn isalpha(self, vm: &VirtualMachine) -> PyResult { + self.inner.isalpha(vm) + } + #[pymethod(name = "isascii")] + fn isascii(self, vm: &VirtualMachine) -> PyResult { + self.inner.isascii(vm) + } + #[pymethod(name = "isdigit")] + fn isdigit(self, vm: &VirtualMachine) -> PyResult { + self.inner.isdigit(vm) + } + #[pymethod(name = "islower")] + fn islower(self, vm: &VirtualMachine) -> PyResult { + self.inner.islower(vm) + } + #[pymethod(name = "isspace")] + fn isspace(self, vm: &VirtualMachine) -> PyResult { + self.inner.isspace(vm) + } + #[pymethod(name = "isupper")] + fn isupper(self, vm: &VirtualMachine) -> PyResult { + self.inner.isupper(vm) + } + #[pymethod(name = "istitle")] + fn istitle(self, vm: &VirtualMachine) -> PyResult { + self.inner.istitle(vm) + } } #[derive(Debug)] From 6c745f68dd6d123fc491cc513429ede529fa3a2e Mon Sep 17 00:00:00 2001 From: Jimmy Girardet Date: Tue, 9 Apr 2019 14:43:13 +0200 Subject: [PATCH 13/19] fix typo, fix bytesinner.add now return Vec[u8] --- vm/src/obj/objbyteinner.rs | 6 +++--- vm/src/obj/objbytes.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 14da2521eb..bcb1f82075 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -55,7 +55,7 @@ impl PyByteInner { } else { return Err(vm.new_type_error("encoding without a string argument".to_string())); } - // On ly one argument + // Only one argument } else { let value = if let OptionalArg::Present(ival) = val_option { match_class!(ival.clone(), @@ -161,14 +161,14 @@ impl PyByteInner { hasher.finish() as usize } - pub fn add(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { + pub fn add(&self, other: &PyByteInner, _vm: &VirtualMachine) -> Vec { let elements: Vec = self .elements .iter() .chain(other.elements.iter()) .cloned() .collect(); - Ok(vm.ctx.new_bytes(elements)) + elements } pub fn contains_bytes(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 83ce3b8fbd..0b257cb773 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -134,7 +134,7 @@ impl PyBytesRef { #[pymethod(name = "__add__")] fn add(self, other: PyObjectRef, vm: &VirtualMachine) -> PyResult { match_class!(other, - bytes @ PyBytes => self.inner.add(&bytes.inner, vm), + bytes @ PyBytes => Ok(vm.ctx.new_bytes(self.inner.add(&bytes.inner, vm))), _ => Ok(vm.ctx.not_implemented())) } From 9b763072fe78d8b2dd9fcdeb31a3d1fafcbcc367 Mon Sep 17 00:00:00 2001 From: Jimmy Girardet Date: Tue, 9 Apr 2019 16:23:56 +0200 Subject: [PATCH 14/19] add upper lower --- tests/snippets/bytes.py | 6 ++++++ vm/src/obj/objbyteinner.rs | 8 ++++++++ vm/src/obj/objbytes.rs | 17 +++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index ceb52921cc..3ccb2fc234 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -112,3 +112,9 @@ assert bytes(b"Is Title Case").istitle() assert not bytes(b"is Not title casE").istitle() + +# upper lower +l = bytes(b"lower") +b = bytes(b"UPPER") +assert l.lower().islower() +assert b.upper().isupper() diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index bcb1f82075..4771a91bd5 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -297,6 +297,14 @@ impl PyByteInner { Ok(vm.new_bool(true)) } + + pub fn lower(&self, _vm: &VirtualMachine) -> Vec { + self.elements.to_ascii_lowercase() + } + + pub fn upper(&self, _vm: &VirtualMachine) -> Vec { + self.elements.to_ascii_uppercase() + } } // TODO diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 0b257cb773..0be8215005 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -158,34 +158,51 @@ impl PyBytesRef { fn isalnum(self, vm: &VirtualMachine) -> PyResult { self.inner.isalnum(vm) } + #[pymethod(name = "isalpha")] fn isalpha(self, vm: &VirtualMachine) -> PyResult { self.inner.isalpha(vm) } + #[pymethod(name = "isascii")] fn isascii(self, vm: &VirtualMachine) -> PyResult { self.inner.isascii(vm) } + #[pymethod(name = "isdigit")] fn isdigit(self, vm: &VirtualMachine) -> PyResult { self.inner.isdigit(vm) } + #[pymethod(name = "islower")] fn islower(self, vm: &VirtualMachine) -> PyResult { self.inner.islower(vm) } + #[pymethod(name = "isspace")] fn isspace(self, vm: &VirtualMachine) -> PyResult { self.inner.isspace(vm) } + #[pymethod(name = "isupper")] fn isupper(self, vm: &VirtualMachine) -> PyResult { self.inner.isupper(vm) } + #[pymethod(name = "istitle")] fn istitle(self, vm: &VirtualMachine) -> PyResult { self.inner.istitle(vm) } + + #[pymethod(name = "lower")] + fn lower(self, vm: &VirtualMachine) -> PyResult { + Ok(vm.ctx.new_bytes(self.inner.lower(vm))) + } + + #[pymethod(name = "upper")] + fn upper(self, vm: &VirtualMachine) -> PyResult { + Ok(vm.ctx.new_bytes(self.inner.upper(vm))) + } } #[derive(Debug)] From 5cc83a35aa6d9430a8f0d7c793d6cbbad17dfd66 Mon Sep 17 00:00:00 2001 From: Jimmy Girardet Date: Tue, 9 Apr 2019 17:08:38 +0200 Subject: [PATCH 15/19] hex --- tests/snippets/bytes.py | 3 ++- vm/src/obj/objbyteinner.rs | 9 +++++++++ vm/src/obj/objbytes.rs | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index 3ccb2fc234..bf7906bc1e 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -113,8 +113,9 @@ assert bytes(b"Is Title Case").istitle() assert not bytes(b"is Not title casE").istitle() -# upper lower +# upper lower hex l = bytes(b"lower") b = bytes(b"UPPER") assert l.lower().islower() assert b.upper().isupper() +assert bytes([0, 1, 9, 23, 90, 234]).hex() == "000109175aea" diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 4771a91bd5..6994a58ac2 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -305,6 +305,15 @@ impl PyByteInner { pub fn upper(&self, _vm: &VirtualMachine) -> Vec { self.elements.to_ascii_uppercase() } + + pub fn hex(&self, vm: &VirtualMachine) -> PyResult { + let bla = self + .elements + .iter() + .map(|x| format!("{:02x}", x)) + .collect::(); + Ok(vm.ctx.new_str(bla)) + } } // TODO diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 0be8215005..5859b10b4c 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -203,6 +203,11 @@ impl PyBytesRef { fn upper(self, vm: &VirtualMachine) -> PyResult { Ok(vm.ctx.new_bytes(self.inner.upper(vm))) } + + #[pymethod(name = "hex")] + fn hex(self, vm: &VirtualMachine) -> PyResult { + self.inner.hex(vm) + } } #[derive(Debug)] From 977f56ade1bc9f9aca692ff14dcb352b42508e10 Mon Sep 17 00:00:00 2001 From: Jimmy Girardet Date: Wed, 10 Apr 2019 10:08:10 +0200 Subject: [PATCH 16/19] add bytes.fromhex --- tests/snippets/bytes.py | 14 +++++++++++++- vm/src/obj/objbyteinner.rs | 33 +++++++++++++++++++++++++++++++++ vm/src/obj/objbytes.rs | 22 +++++++++++++++++++--- 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index bf7906bc1e..acd8d9e506 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -113,9 +113,21 @@ assert bytes(b"Is Title Case").istitle() assert not bytes(b"is Not title casE").istitle() -# upper lower hex +# upper lower l = bytes(b"lower") b = bytes(b"UPPER") assert l.lower().islower() assert b.upper().isupper() + +# hex from hex assert bytes([0, 1, 9, 23, 90, 234]).hex() == "000109175aea" + +bytes.fromhex("62 6c7a 34350a ") == b"blz45\n" +try: + bytes.fromhex("62 a 21") +except ValueError as e: + str(e) == "non-hexadecimal number found in fromhex() arg at position 4" +try: + bytes.fromhex("6Z2") +except ValueError as e: + str(e) == "non-hexadecimal number found in fromhex() arg at position 1" diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 6994a58ac2..7504f2b749 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -314,6 +314,39 @@ impl PyByteInner { .collect::(); Ok(vm.ctx.new_str(bla)) } + + pub fn fromhex(string: String, vm: &VirtualMachine) -> Result, PyObjectRef> { + // first check for invalid character + for (i, c) in string.char_indices() { + if !c.is_digit(16) && !c.is_whitespace() { + return Err(vm.new_value_error(format!( + "non-hexadecimal number found in fromhex() arg at position {}", + i + ))); + } + } + + // strip white spaces + let stripped = string.split_whitespace().collect::(); + + // Hex is evaluated on 2 digits + if stripped.len() % 2 != 0 { + return Err(vm.new_value_error(format!( + "non-hexadecimal number found in fromhex() arg at position {}", + stripped.len() - 1 + ))); + } + + // parse even string + Ok(stripped + .chars() + .collect::>() + .chunks(2) + .map(|x| x.to_vec().iter().collect::()) + .map(|x| u8::from_str_radix(&x, 16)) + .map(|x| x.unwrap()) + .collect::>()) + } } // TODO diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 5859b10b4c..2c8edca6ab 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -1,4 +1,5 @@ use crate::obj::objint::PyInt; +use crate::obj::objstr::PyString; use crate::vm::VirtualMachine; use core::cell::Cell; use std::ops::Deref; @@ -55,11 +56,15 @@ pub fn get_value<'a>(obj: &'a PyObjectRef) -> impl Deref> + 'a pub fn init(context: &PyContext) { PyBytesRef::extend_class(context, &context.bytes_type); + let bytes_type = &context.bytes_type; + extend_class!(context, bytes_type, { +"fromhex" => context.new_rustfunc(PyBytesRef::fromhex), +}); let bytesiterator_type = &context.bytesiterator_type; extend_class!(context, bytesiterator_type, { - "__next__" => context.new_rustfunc(PyBytesIteratorRef::next), - "__iter__" => context.new_rustfunc(PyBytesIteratorRef::iter), - }); +"__next__" => context.new_rustfunc(PyBytesIteratorRef::next), +"__iter__" => context.new_rustfunc(PyBytesIteratorRef::iter), +}); } #[pyimpl(__inside_vm)] @@ -208,6 +213,17 @@ impl PyBytesRef { fn hex(self, vm: &VirtualMachine) -> PyResult { self.inner.hex(vm) } + + // #[pymethod(name = "fromhex")] + fn fromhex(string: PyObjectRef, vm: &VirtualMachine) -> PyResult { + match_class!(string, + s @ PyString => { + match PyByteInner::fromhex(s.to_string(), vm) { + Ok(x) => Ok(vm.ctx.new_bytes(x)), + Err(y) => Err(y)}}, + obj => Err(vm.new_type_error(format!("fromhex() argument must be str, not {}", obj ))) + ) + } } #[derive(Debug)] From 751c3d52e2a4deccd0443b84c0c70329a2b76f02 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Wed, 10 Apr 2019 13:24:31 +0200 Subject: [PATCH 17/19] use vec! for int arg in bytes new --- tests/snippets/bytes.py | 6 +++++- vm/src/obj/objbyteinner.rs | 6 +----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index acd8d9e506..0a3521df33 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -4,8 +4,8 @@ assert bytes([1, 2, 3]) assert bytes((1, 2, 3)) assert bytes(range(4)) -assert b"bla" assert bytes(3) +assert b"bla" assert bytes("bla", "utf8") try: bytes("bla") @@ -14,6 +14,9 @@ else: assert False + + + a = b"abcd" b = b"ab" c = b"abcd" @@ -49,6 +52,7 @@ # iter [i for i in b"abcd"] == ["a", "b", "c", "d"] +assert list(bytes(3)) == [0,0,0] # add assert a + b == b"abcdab" diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 7504f2b749..fac093762a 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -61,11 +61,7 @@ impl PyByteInner { match_class!(ival.clone(), i @ PyInt => { let size = objint::get_value(&i.into_object()).to_usize().unwrap(); - let mut res: Vec = Vec::with_capacity(size); - for _ in 0..size { - res.push(0) - } - Ok(res)}, + Ok(vec![0; size])}, _l @ PyString=> {return Err(vm.new_type_error(format!( "string argument without an encoding" )));}, From 54c7335f1c32c0215b81f856cac26218a1223f5e Mon Sep 17 00:00:00 2001 From: Jimmy Girardet Date: Wed, 10 Apr 2019 18:05:28 +0200 Subject: [PATCH 18/19] SyntaxError for non ascii char --- parser/src/lexer.rs | 8 ++++++-- tests/snippets/bytes.py | 4 ---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index edf21c973c..561ea4d772 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -540,8 +540,12 @@ where let end_pos = self.get_pos(); let tok = if is_bytes { - Tok::Bytes { - value: string_content.as_bytes().to_vec(), + if string_content.is_ascii() { + Tok::Bytes { + value: string_content.as_bytes().to_vec(), + } + } else { + return Err(LexicalError::StringError); } } else { Tok::String { diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index 0a3521df33..816eae4951 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -11,10 +11,6 @@ bytes("bla") except TypeError: assert True -else: - assert False - - a = b"abcd" From 9d25a216de4d8240f72eb6f347228dc9b31269c1 Mon Sep 17 00:00:00 2001 From: jgirardet Date: Wed, 10 Apr 2019 21:39:16 +0200 Subject: [PATCH 19/19] fix some tests, run clippy --- tests/snippets/bytes.py | 12 ++++-------- vm/src/obj/objbyteinner.rs | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/tests/snippets/bytes.py b/tests/snippets/bytes.py index 816eae4951..0ffbc29f8c 100644 --- a/tests/snippets/bytes.py +++ b/tests/snippets/bytes.py @@ -7,10 +7,8 @@ assert bytes(3) assert b"bla" assert bytes("bla", "utf8") -try: +with assertRaises(TypeError): bytes("bla") -except TypeError: - assert True a = b"abcd" @@ -48,7 +46,7 @@ # iter [i for i in b"abcd"] == ["a", "b", "c", "d"] -assert list(bytes(3)) == [0,0,0] +assert list(bytes(3)) == [0, 0, 0] # add assert a + b == b"abcdab" @@ -62,10 +60,8 @@ assert b"dc" not in b"abcd" assert 97 in b"abcd" assert 150 not in b"abcd" -try: +with assertRaises(ValueError): 350 in b"abcd" -except ValueError: - pass # getitem @@ -113,7 +109,7 @@ assert bytes(b"Is Title Case").istitle() assert not bytes(b"is Not title casE").istitle() -# upper lower +# upper lower l = bytes(b"lower") b = bytes(b"UPPER") assert l.lower().islower() diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index fac093762a..532504b028 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -31,8 +31,8 @@ impl PyByteInner { if let OptionalArg::Present(eval) = val_option { if let Ok(input) = eval.downcast::() { if let Ok(encoding) = enc.clone().downcast::() { - if encoding.value.to_lowercase() == "utf8".to_string() - || encoding.value.to_lowercase() == "utf-8".to_string() + if &encoding.value.to_lowercase() == "utf8" + || &encoding.value.to_lowercase() == "utf-8" // TODO: different encoding { return Ok(PyByteInner { @@ -62,12 +62,10 @@ impl PyByteInner { i @ PyInt => { let size = objint::get_value(&i.into_object()).to_usize().unwrap(); Ok(vec![0; size])}, - _l @ PyString=> {return Err(vm.new_type_error(format!( - "string argument without an encoding" - )));}, + _l @ PyString=> {return Err(vm.new_type_error("string argument without an encoding".to_string()));}, obj => { - let elements = vm.extract_elements(&obj).or_else(|_| {return Err(vm.new_type_error(format!( - "cannot convert {} object to bytes", obj.class().name)));}); + let elements = vm.extract_elements(&obj).or_else(|_| {Err(vm.new_type_error(format!( + "cannot convert {} object to bytes", obj.class().name)))}); let mut data_bytes = vec![]; for elem in elements.unwrap(){ @@ -111,6 +109,10 @@ impl PyByteInner { self.elements.len() } + pub fn is_empty(&self) -> bool { + self.elements.len() == 0 + } + pub fn eq(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { if self.elements == other.elements { Ok(vm.new_bool(true)) @@ -169,10 +171,11 @@ impl PyByteInner { pub fn contains_bytes(&self, other: &PyByteInner, vm: &VirtualMachine) -> PyResult { for (n, i) in self.elements.iter().enumerate() { - if n + other.len() <= self.len() && *i == other.elements[0] { - if &self.elements[n..n + other.len()] == other.elements.as_slice() { - return Ok(vm.new_bool(true)); - } + if n + other.len() <= self.len() + && *i == other.elements[0] + && &self.elements[n..n + other.len()] == other.elements.as_slice() + { + return Ok(vm.new_bool(true)); } } Ok(vm.new_bool(false)) @@ -186,7 +189,7 @@ impl PyByteInner { Ok(vm.new_bool(false)) } } else { - Err(vm.new_value_error("byte mu st be in range(0, 256)".to_string())) + Err(vm.new_value_error("byte must be in range(0, 256)".to_string())) } } @@ -344,6 +347,3 @@ impl PyByteInner { .collect::>()) } } - -// TODO -// fix b"é" not allowed should be bytes("é", "utf8")